import os from threading import Thread from typing import Iterator import gradio as gr import spaces import torch from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer import subprocess subprocess.run( "pip install flash-attn --no-build-isolation", env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"}, shell=True, ) CUSTOM_CSS = """ .container { max-width: 1000px !important; margin: auto !important; padding-top: 2rem !important; } .header-container { background: linear-gradient(135deg, #1e3a8a 0%, #3b82f6 100%); padding: 2rem; border-radius: 1rem; margin-bottom: 2rem; color: white; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); } .model-info { background: white; padding: 1.5rem; border-radius: 0.5rem; margin-top: 1rem; box-shadow: 0 2px 4px rgba(0, 0, 0, 0.1); } .chat-container { border: 1px solid #e5e7eb; border-radius: 1rem; background: white; box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1); } .message { padding: 1rem; margin: 0.5rem; border-radius: 0.5rem; } .user-message { background: #f3f4f6; } .assistant-message { background: #dbeafe; } .controls-container { background: #f8fafc; padding: 1.5rem; border-radius: 0.5rem; margin-top: 1rem; } .slider-label { font-weight: 600; color: #374151; } .duplicate-button { background: #2563eb !important; color: white !important; padding: 0.75rem 1.5rem !important; border-radius: 0.5rem !important; font-weight: 600 !important; transition: all 0.2s !important; } .duplicate-button:hover { background: #1d4ed8 !important; transform: translateY(-1px) !important; } """ DESCRIPTION = '''

Lexora-Lite-3B

About the Model

This Space demonstrates Lexora-Lite-3B Chat ITA, currently the best open-source large language model for the Italian language. Compare its performance with other models on the official leaderboard.

''' # Rest of your existing code remains the same until the Blocks creation with gr.Blocks(css=CUSTOM_CSS, theme=gr.themes.Soft( primary_hue="blue", secondary_hue="blue", neutral_hue="slate", font=gr.themes.GoogleFont("Inter"), radius_size=gr.themes.sizes.radius_sm, )) as demo: with gr.Column(elem_classes="container"): gr.Markdown(DESCRIPTION) with gr.Column(elem_classes="chat-container"): chat_interface = gr.ChatInterface( fn=generate, additional_inputs=[ gr.Textbox( value="", label="System Message", elem_classes="system-message", render=False, ), gr.Column(elem_classes="controls-container") as controls: with controls: gr.Slider( label="Maximum New Tokens", minimum=1, maximum=MAX_MAX_NEW_TOKENS, step=1, value=DEFAULT_MAX_NEW_TOKENS, elem_classes="slider-label", ), gr.Slider( label="Temperature", minimum=0, maximum=4.0, step=0.1, value=0.001, elem_classes="slider-label", ), gr.Slider( label="Top-p (Nucleus Sampling)", minimum=0.05, maximum=1.0, step=0.05, value=1.0, elem_classes="slider-label", ), gr.Slider( label="Top-k", minimum=1, maximum=1000, step=1, value=50, elem_classes="slider-label", ), gr.Slider( label="Repetition Penalty", minimum=1.0, maximum=2.0, step=0.05, value=1.0, elem_classes="slider-label", ), ], examples=[ ["Ciao! Come stai?"], ], cache_examples=False, ) gr.DuplicateButton( value="Duplicate Space for Private Use", elem_classes="duplicate-button", elem_id="duplicate-button", ) if __name__ == "__main__": demo.queue(max_size=20).launch()