import gradio as gr from gradio_client import Client #fusecap_client = Client("https://noamrot-fusecap-image-captioning.hf.space/") fuyu_client = Client("https://adept-fuyu-8b-demo.hf.space/") def get_caption(image_in): fuyu_result = fuyu_client.predict( image_in, # str representing input in 'raw_image' Image component True, # bool in 'Enable detailed captioning' Checkbox component fn_index=2 ) # Find the last occurrence of "." last_period_index = fuyu_result.rfind('.') # Truncate the string up to the last period truncated_caption = fuyu_result[:last_period_index + 1] # print(truncated_caption) print(f"\n—\nIMAGE CAPTION: {truncated_caption}") return truncated_caption def infer(image_in): gr.Info("Getting image caption with Fuyu...") user_prompt = get_caption(image_in) return user_prompt title = f"LLM Agent from a Picture", description = f"Get a LLM system prompt from a picture so you can use it in GPT-Baker." css = """ #col-container{ margin: 0 auto; max-width: 780px; text-align: left; } """ with gr.Blocks(css=css) as demo: with gr.Column(elem_id="col-container"): gr.HTML(f"""
{description}
""") with gr.Row(): with gr.Column(): image_in = gr.Image( label = "Image reference", type = "filepath", elem_id = "image-in" ) submit_btn = gr.Button("Make desciptions of my pic !") with gr.Column(): result = gr.Textbox( label ="Suggested System", lines = 6, max_lines = 30, elem_id = "suggested-system-prompt" ) with gr.Row(): gr.Examples( examples = [ ["examples/ponder.png"], ["examples/ponder2.png"], ], fn = infer, inputs = [image_in], outputs = [result], cache_examples = True ) submit_btn.click( fn = infer, inputs = [ image_in ], outputs =[ result ] ) demo.queue().launch(show_api=False)