import gradio as gr from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline import torch import os import traceback # --- Configuration --- # Running a VERY SMALL model LOCALLY on the CPU to avoid API errors. # EXPECT SLOW PERFORMANCE AND LOWER QUALITY OUTPUT. MODEL_NAME = "TinyLlama/TinyLlama-1.1B-Chat-v1.0" # Max new tokens for generation MAX_NEW_TOKENS = 512 # Keep lower for faster generation on CPU # Default System Prompt - guides the AI's behavior DEFAULT_SYSTEM_PROMPT = "You are a helpful AI assistant specialized in writing basic Luau code for the Roblox platform. Format Luau code blocks clearly using ```lua ... ```." # --- Model and Tokenizer Loading (Local) --- print(f"Attempting to load model LOCALLY: {MODEL_NAME}") print("This may take some time and use significant RAM...") model = None pipe = None tokenizer = None try: # Load tokenizer tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) print("Tokenizer loaded.") # Load model model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, torch_dtype=torch.float32, # Use float32 for CPU compatibility usually # device_map='auto' might try to use GPU if detected, force CPU if needed # Consider forcing CPU if 'auto' causes issues on basic tier: device_map='cpu' device_map="auto", # trust_remote_code=True # Not typically needed for TinyLlama base, but add if errors occur # low_cpu_mem_usage=True # Can help loading large models, maybe useful here too ) print("Model loaded successfully.") # --- Create the Text Generation Pipeline (Local) --- pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, max_new_tokens=MAX_NEW_TOKENS, temperature=0.7, # Standard temp top_p=0.9, do_sample=True, pad_token_id=tokenizer.eos_token_id if tokenizer else 50256 # Default EOS for many models ) print("Text generation pipeline created.") except Exception as e: print("-----------------------------------------------------") print(f"ERROR during local model loading or pipeline creation.") print(f"Error Type: {type(e).__name__}") print(f"Error Details: {e}") print("Possible Causes:") print(" 1. Model is too large for available RAM (16GB limit).") print(" 2. Missing dependencies (check requirements.txt).") print(" 3. Issues downloading model files.") print("Full Traceback:") print(traceback.format_exc()) print("-----------------------------------------------------") # Ensure pipe is None so the function returns an error # --- Chatbot Logic Function (Local Execution) --- def luau_coder_local(message: str, history: list[list[str]], system_prompt: str = DEFAULT_SYSTEM_PROMPT): # Check if the model/pipeline failed to load if pipe is None or tokenizer is None: return "Error: AI model could not be loaded locally. Check Space logs for details (likely memory limit or dependency issue)." # --- Assemble the Prompt for TinyLlama Chat (ChatML format) --- # Format: <|system|>\nPROMPT\n<|user|>\nPROMPT\n<|assistant|>\nRESPONSE prompt_list = [] current_system_prompt = system_prompt or DEFAULT_SYSTEM_PROMPT # Add system prompt first if current_system_prompt: prompt_list.append(f"<|system|>\n{current_system_prompt}") # Add history for user_msg, assistant_msg in history: prompt_list.append(f"<|user|>\n{user_msg}") if assistant_msg: # Ensure assistant message exists prompt_list.append(f"<|assistant|>\n{assistant_msg}") # Add the current user message prompt_list.append(f"<|user|>\n{message}") # Add the final model turn marker, signaling the model to start generating prompt_list.append("<|assistant|>") # Model generates after this # Combine the list into a single string prompt prompt = "\n".join(prompt_list) print(f"\n--- Generating Response Locally (length: {len(prompt)}) ---") # print(f"Prompt Content:\n{prompt}") # Uncomment for debugging try: # Generate response using the local pipeline # NOTE: This will be SLOW on CPU sequences = pipe(prompt) generated_text = sequences[0]['generated_text'] print(f"Raw generated text received (length: {len(generated_text)})") # Extract the response part after the final marker response_start_index = generated_text.rfind('<|assistant|>') if response_start_index != -1: response = generated_text[response_start_index + len('<|assistant|>'):].strip() else: # Fallback if the marker wasn't found (shouldn't happen often) print("Warning: Could not find '<|assistant|>' marker in response.") response = generated_text # Use the whole text, might include prompt # Clean up the response text - remove EOS token response_cleaned = response.replace("", "").strip() # Try to format as Lua code block if needed if "```lua" not in response_cleaned and "```" in response_cleaned: response_formatted = response_cleaned.replace("```", "```lua", 1) elif "```" not in response_cleaned: # Basic check if it looks like code, then wrap it if any(kw in response_cleaned for kw in ["function", "local ", "game.", "script.", "Instance.new", "Vector3.new", ":Connect", "wait("]): response_formatted = f"```lua\n{response_cleaned}\n```" else: response_formatted = response_cleaned # Assume plain text else: response_formatted = response_cleaned # Assume already formatted correctly # Handle cases where the response might be empty after cleaning if not response_cleaned: # Return the raw generated text if cleaning resulted in nothing, maybe EOS was the only thing generated if generated_text and generated_text != prompt: return generated_text.replace("", "").strip() else: return "Sorry, the AI generated an empty response." return response_formatted # Catch potential errors during generation except Exception as e: error_type = type(e).__name__ print(f"Error during local generation ({error_type}): {e}") print(traceback.format_exc()) # Log the full error for debugging return f"Sorry, an unexpected error occurred during local generation ({error_type})." # --- Gradio Interface Setup --- print("Setting up Gradio Interface...") chatbot_interface = gr.ChatInterface( fn=luau_coder_local, # Connects the chat input to our LOCAL function title="🐌 Luau Code Assistant (Roblox - Local CPU Version)", # Updated title description=f"Ask me for basic Luau code. Responses will be SLOW. (Powered by {MODEL_NAME} running locally)", # Updated description chatbot=gr.Chatbot(height=600, render_markdown=True, bubble_full_width=False), textbox=gr.Textbox(placeholder="e.g., 'Make a part print Hello'", container=False, scale=7), additional_inputs=[gr.Textbox(value=DEFAULT_SYSTEM_PROMPT, label="System Prompt")], examples=[ ["Make a part print 'Hi!' when touched"], ["How to make a variable in Luau?"], ["Change part color to red"], ], # Simplified examples for a smaller model theme="soft", ) print("Gradio Interface configured.") # --- Launch the Application --- if __name__ == "__main__": print("Launching Gradio app...") # queue() is still important for local generation as it can take time chatbot_interface.queue().launch() print("Gradio app should be running now.")