Spaces:

umint
/

gemma-3-270m

Running

File size: 2,986 Bytes

dd61af5

#
# SPDX-FileCopyrightText: Hadad <hadad@linuxmail.org>
# SPDX-License-Identifier: Apache-2.0
#

import os  # Used for accessing environment variables
import gradio as gr  # Used to create the user interface

# Gradio user interface
with gr.Blocks(
    fill_height=True,  # Adjusting to the height of the user's screen
    fill_width=True  # Adjusting to the width of the user's screen
) as app:
    # Sidebar
    with gr.Sidebar():
        # Project description
        gr.HTML(
            """
            This space run the <b><a href=
            "https://huggingface.co/google/gemma-3-270m" 
            target="_blank">Gemma 3 (270M)</a></b> model from 
            <b>Google</b>, hosted on a server using <b>Ollama</b> and 
            accessed via <b>OpenAI-Style inference.</b><br><br>
            Official <b>documentation</b> for using Ollama with 
            OpenAI-Style inference can be found 
            <b><a href="https://ollama.com/blog/openai-compatibility" 
            target="_blank">here</a></b>.<br><br>
            Gemma 3 (270M) run entirely on <b>CPU</b>, utilizing only a 
            <b>single core</b>. This is sufficient due to the small 
            size of the model, which makes it possible to operate 
            efficiently on minimal hardware.<br><br>
            The Gemma 3 (270M) model can be viewed or downloaded from 
            the official Ollama website, 
            <b><a href="https://ollama.com/library/gemma3:270m" 
            target="_blank">here</a></b>.<br><br>
            Gemma 3 has multimodal capabilities. However, running on 
            CPU with a small number of parameters may limit its 
            understanding of context. For this reason, the 
            upload functionality has been disabled.<br><br>
            <b>Like this project? Feel free to buy me a <a href=
            "https://ko-fi.com/hadad" target="_blank">
            coffee</a></b>.
            """
        )
    # Load chat interface
    gr.load_chat(
        os.getenv("OLLAMA_API_BASE_URL"),  # Endpoint
        token=os.getenv("OLLAMA_API_KEY"),  # API Key
        model="gemma3:270m",  # Model
        chatbot=gr.Chatbot(
            label="Ollama | Gemma 3 (270M)",  # Chatbot title
            type="messages",  # OpenAI-style messages format
            show_copy_button=True,  # Allow users to copy responses
            scale=1  # Standard display scaling
        ),
        file_types=None,  # Disable multimodal
        examples=[
            ["Please introduce yourself."],
            ["What caused World War II?"],
            ["Give me a short introduction to large language model."],
            ["Explain about quantum computers."]
        ],  # Provide sample inputs for users to try
        cache_examples=False,  # Ensure responses always fresh
        show_api=False  # Disable Gradio API
    )

# Start the app
app.launch(
    server_name="0.0.0.0",  # Listen on all network interfaces
    pwa=True  # Progressive Web App
)