|
|
import gradio as gr |
|
|
import torch |
|
|
from transformers import AutoProcessor, AutoModelForCausalLM, tqdm |
|
|
|
|
|
|
|
|
FLORENCE_MODEL_ID = "microsoft/Florence-2-base" |
|
|
DOLPHIN_MODEL_ID = "cognitivecomputations/dolphin-2.9.4-qwen2-1.5b" |
|
|
|
|
|
|
|
|
print("Loading Florence-2...") |
|
|
florence_model = AutoModelForCausalLM.from_pretrained(FLORENCE_MODEL_ID, trust_remote_code=True).to("cpu").eval() |
|
|
florence_processor = AutoProcessor.from_pretrained(FLORENCE_MODEL_ID, trust_remote_code=True) |
|
|
|
|
|
|
|
|
print("Loading Dolphin-Qwen...") |
|
|
dolphin_model = AutoModelForCausalLM.from_pretrained(DOLPHIN_MODEL_ID).to("cpu").eval() |
|
|
dolphin_tokenizer = AutoProcessor.from_pretrained(DOLPHIN_MODEL_ID) |
|
|
|
|
|
def process_ui_task(image, prompt): |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
inputs = florence_processor(text="<OD>", images=image, return_tensors="pt").to("cpu") |
|
|
generated_ids = florence_model.generate( |
|
|
input_ids=inputs["input_ids"], |
|
|
pixel_values=inputs["pixel_values"], |
|
|
max_new_tokens=1024, |
|
|
do_sample=False, |
|
|
num_beams=3 |
|
|
) |
|
|
results = florence_processor.batch_decode(generated_ids, skip_special_tokens=False)[0] |
|
|
|
|
|
return f"Brain Output: {results}" |
|
|
|
|
|
|
|
|
interface = gr.Interface( |
|
|
fn=process_ui_task, |
|
|
inputs=[gr.Image(type="pill"), gr.Textbox(label="Instruction")], |
|
|
outputs="text", |
|
|
title="AI Automation Brain (Florence + Dolphin)" |
|
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
interface.launch() |
|
|
|