Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import sys | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| model_id = "https://huggingface.co/imsuprtwo2/NanoBit-300M" | |
| print("Starting MASA Boot Sequence...") | |
| sys.stdout.flush() # Forces the logs to actually show up | |
| tokenizer = AutoTokenizer.from_pretrained(model_id) | |
| # The "Low RAM" loader is mandatory for a 1.4GB file on a free Space | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_id, | |
| low_cpu_mem_usage=True, | |
| torch_dtype=torch.float32, | |
| trust_remote_code=True | |
| ) | |
| def chat(message, history): | |
| inputs = tokenizer(message, return_tensors="pt") | |
| with torch.no_grad(): | |
| outputs = model.generate(**inputs, max_new_tokens=50) | |
| return tokenizer.decode(outputs[0], skip_special_tokens=True).replace(message, "").strip() | |
| # We MUST bind to 0.0.0.0 and port 7860 for Hugging Face to see us | |
| demo = gr.ChatInterface(chat) | |
| demo.launch(server_name="0.0.0.0", server_port=7860) |