Spaces:

1simo
/

my-simple-llm-demo

Sleeping

File size: 4,502 Bytes

1b7f60b

import gradio as gr
import torch # Optional, but good practice if using a PyTorch model
from transformers import AutoModelForCausalLM, AutoTokenizer

# --- 1. Load a simple, small pre-trained LLM and its tokenizer ---
# We'll use DistilGPT2 for speed and small size.
# You can replace this with another small model if you prefer.
model_name = "distilgpt2"
try:
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name)
    # If you have a GPU, uncomment the next line
    # model.to("cuda" if torch.cuda.is_available() else "cpu")
    model_loaded = True
    print(f"Successfully loaded model and tokenizer for: {model_name}")
except Exception as e:
    print(f"Error loading model: {e}")
    model_loaded = False
    # Define dummy functions if model fails to load, so Gradio interface still launches
    def generate_text_from_llm(prompt_text):
        return "Error: Model could not be loaded. Please check server logs."
    tokenizer = None # To avoid errors later if tokenizer specific functions are called

if model_loaded and tokenizer:
    # Ensure pad_token is set if it's not already (important for generate)
    if tokenizer.pad_token is None:
        tokenizer.pad_token = tokenizer.eos_token
        model.config.pad_token_id = model.config.eos_token_id

    # --- 2. Define the LLM inference function ---
    def generate_text_from_llm(prompt_text):
        """
        Generates a short text continuation using the loaded LLM.
        """
        if not prompt_text:
            return "Please enter a starting prompt!"

        try:
            # Encode the input prompt
            inputs = tokenizer.encode(prompt_text, return_tensors="pt", truncation=True, max_length=512)
            # If you have a GPU, uncomment the next line
            # inputs = inputs.to("cuda" if torch.cuda.is_available() else "cpu")

            # Generate text
            # max_length is the total length of prompt + generated text
            # num_return_sequences=1 means we want one completion
            # no_repeat_ngram_size helps avoid repetitive text
            outputs = model.generate(
                inputs,
                max_length=len(inputs[0]) + 50, # Generate up to 50 new tokens
                num_return_sequences=1,
                pad_token_id=tokenizer.eos_token_id, # Use EOS token for padding during generation
                no_repeat_ngram_size=2, # Avoid repeating 2-grams
                early_stopping=True
            )

            # Decode the generated text
            generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
            # Return only the newly generated part (optional, can be tricky)
            # For simplicity, we'll return the whole thing for now.
            # To return only new text: return generated_text[len(prompt_text):].strip()
            return generated_text

        except Exception as e:
            print(f"Error during generation: {e}")
            return f"Error during text generation: {e}"

# --- 3. Create the Gradio Interface ---
demo = gr.Interface(
    fn=generate_text_from_llm,
    inputs=[
        gr.Textbox(
            label="Enter your prompt",
            placeholder="Start typing here...",
            lines=5
        )
    ],
    outputs=[
        gr.Textbox(label="LLM Generated Text", lines=10)
    ],
    title="📝 Simple LLM Text Generator",
    description="Enter a prompt and a small LLM (DistilGPT2) will try to continue it. This is a basic demo for learning purposes.",
    examples=[
        ["Once upon a time, in a land far away,"],
        ["The best way to learn programming is"],
        ["Artificial intelligence is rapidly changing the world by"]
    ],
    theme=gr.themes.Soft() # You can try other themes like gr.themes.Default()
)

# --- 4. Launch the app ---
# When deploying to Hugging Face Spaces, they will run this launch() command.
# For local testing with a shareable link, use share=True.
if __name__ == "__main__":
    if model_loaded:
        demo.launch(debug=True, share=True) # share=True creates a temporary public link
    else:
        print("Model failed to load. Gradio app will run with an error message function.")
        # Launch with the dummy function so the UI still appears
        demo_error = gr.Interface(fn=lambda x: "Error: Model could not be loaded.", inputs="textbox", outputs="textbox", title="LLM Demo - MODEL LOAD ERROR")
        demo_error.launch(debug=True, share=True)