InternVL2.5 Expanding Performance Boundaries of Open-Source Multimodal Models with Model, Data, and Test-Time Scaling
[đ InternVL Blog] [đ Official Demo] [đ Quick Start] """ # .gradio-container {margin: 5px 10px 0 10px !important}; block_css = """ .gradio-container {margin: 0.1% 1% 0 1% !important; max-width: 98% !important;}; #buttons button { min-width: min(120px,100%); } .gradient-text { font-size: 28px; width: auto; font-weight: bold; background: linear-gradient(45deg, red, orange, yellow, green, blue, indigo, violet); background-clip: text; -webkit-background-clip: text; color: transparent; } .plain-text { font-size: 22px; width: auto; font-weight: bold; } """ js = """ function createWaveAnimation() { const text = document.getElementById('text'); var i = 0; setInterval(function() { const colors = [ 'red, orange, yellow, green, blue, indigo, violet, purple', 'orange, yellow, green, blue, indigo, violet, purple, red', 'yellow, green, blue, indigo, violet, purple, red, orange', 'green, blue, indigo, violet, purple, red, orange, yellow', 'blue, indigo, violet, purple, red, orange, yellow, green', 'indigo, violet, purple, red, orange, yellow, green, blue', 'violet, purple, red, orange, yellow, green, blue, indigo', 'purple, red, orange, yellow, green, blue, indigo, violet', ]; const angle = 45; const colorIndex = i % colors.length; text.style.background = `linear-gradient(${angle}deg, ${colors[colorIndex]})`; text.style.webkitBackgroundClip = 'text'; text.style.backgroundClip = 'text'; text.style.color = 'transparent'; text.style.fontSize = '28px'; text.style.width = 'auto'; text.textContent = 'InternVL2'; text.style.fontWeight = 'bold'; i += 1; }, 200); const params = new URLSearchParams(window.location.search); url_params = Object.fromEntries(params); // console.log(url_params); // console.log('hello world...'); // console.log(window.location.search); // console.log('hello world...'); // alert(window.location.search) // alert(url_params); return url_params; } """ def build_demo(): textbox = gr.MultimodalTextbox( interactive=True, file_types=["image", "video"], placeholder="Enter message or upload file...", show_label=False, ) with gr.Blocks( title="InternVL-Chat", theme=gr.themes.Default(), css=block_css, ) as demo: state = gr.State() with gr.Row(): with gr.Column(scale=2): # gr.Image('./gallery/logo-47b364d3.jpg') gr.HTML(title_html) with gr.Accordion("Settings", open=False) as setting_row: system_prompt = gr.Textbox( value="č¯ˇå°Ŋå¯čŊč¯Ļįģå°åįį¨æˇįéŽéĸã", label="System Prompt", interactive=True, ) temperature = gr.Slider( minimum=0.0, maximum=1.0, value=0.2, step=0.1, interactive=True, label="Temperature", ) top_p = gr.Slider( minimum=0.0, maximum=1.0, value=0.7, step=0.1, interactive=True, label="Top P", ) repetition_penalty = gr.Slider( minimum=1.0, maximum=1.5, value=1.1, step=0.02, interactive=True, label="Repetition penalty", ) max_output_tokens = gr.Slider( minimum=0, maximum=4096, value=1024, step=64, interactive=True, label="Max output tokens", ) max_input_tiles = gr.Slider( minimum=1, maximum=32, value=12, step=1, interactive=True, label="Max input tiles (control the image size)", ) examples = gr.Examples( examples=[ [ { "files": [ "gallery/14.jfif", ], "text": "Please help me analyze this picture.", } ], [ { "files": [ "gallery/1-2.PNG", ], "text": "Implement this flow chart using python", } ], [ { "files": [ "gallery/15.PNG", ], "text": "Please help me analyze this picture.", } ], ], inputs=[textbox], ) with gr.Column(scale=8): chatbot = gr.Chatbot( elem_id="chatbot", label="InternVL", height=580, show_copy_button=True, show_share_button=True, avatar_images=[ "assets/human.png", "assets/assistant.png", ], bubble_full_width=False, ) with gr.Row(): with gr.Column(scale=8): textbox.render() with gr.Column(scale=1, min_width=50): submit_btn = gr.Button(value="Send", variant="primary") with gr.Row(elem_id="buttons") as button_row: upvote_btn = gr.Button(value="đ Upvote", interactive=False) downvote_btn = gr.Button(value="đ Downvote", interactive=False) flag_btn = gr.Button(value="â ī¸ Flag", interactive=False) # stop_btn = gr.Button(value="âšī¸ Stop Generation", interactive=False) regenerate_btn = gr.Button( value="đ Regenerate", interactive=False ) clear_btn = gr.Button(value="đī¸ Clear", interactive=False) url_params = gr.JSON(visible=False) # Register listeners btn_list = [upvote_btn, downvote_btn, flag_btn, regenerate_btn, clear_btn] upvote_btn.click( upvote_last_response, [state], [textbox, upvote_btn, downvote_btn, flag_btn], ) downvote_btn.click( downvote_last_response, [state], [textbox, upvote_btn, downvote_btn, flag_btn], ) chatbot.like( vote_selected_response, [state], [], ) flag_btn.click( flag_last_response, [state], [textbox, upvote_btn, downvote_btn, flag_btn], ) regenerate_btn.click( regenerate, [state, system_prompt], [state, chatbot, textbox] + btn_list, ).then( http_bot, [ state, temperature, top_p, repetition_penalty, max_output_tokens, max_input_tiles, ], [state, chatbot, textbox] + btn_list, ) clear_btn.click(clear_history, None, [state, chatbot, textbox] + btn_list) textbox.submit( add_text, [state, textbox, system_prompt], [state, chatbot, textbox] + btn_list, ).then( http_bot, [ state, temperature, top_p, repetition_penalty, max_output_tokens, max_input_tiles, ], [state, chatbot, textbox] + btn_list, ) submit_btn.click( add_text, [state, textbox, system_prompt], [state, chatbot, textbox] + btn_list, ).then( http_bot, [ state, temperature, top_p, repetition_penalty, max_output_tokens, max_input_tiles, ], [state, chatbot, textbox] + btn_list, ) return demo if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument("--host", type=str, default="0.0.0.0") parser.add_argument("--port", type=int, default=7860) parser.add_argument("--concurrency-count", type=int, default=10) parser.add_argument("--share", action="store_true") parser.add_argument("--moderate", action="store_true") args = parser.parse_args() logger.info(f"args: {args}") logger.info(args) demo = build_demo() demo.queue(api_open=False).launch( server_name=args.host, server_port=args.port, share=args.share, max_threads=args.concurrency_count, )