Spaces:
Sleeping
Sleeping
| """ | |
| Colab Node Installer | |
| One-liner: !wget -q https://huggingface.co/spaces/ndwdgda/agent-dashboard/resolve/main/install_node.py && python install_node.py | |
| """ | |
| import os | |
| import subprocess | |
| import sys | |
| import time | |
| print("="*60) | |
| print("π FREE SUPERCOMPUTER NODE SETUP") | |
| print("="*60) | |
| # 1. Install Dependencies | |
| print("\nπ¦ Installing dependencies...") | |
| packages = [ | |
| "fastapi", "nest-asyncio", "pyngrok", "uvicorn", | |
| "transformers", "accelerate", "bitsandbytes", "torch" | |
| ] | |
| subprocess.run([sys.executable, "-m", "pip", "install", "-q"] + packages) | |
| print("β Dependencies installed") | |
| # 2. Setup ngrok | |
| token_file = "/content/ngrok_token.txt" | |
| if os.path.exists(token_file): | |
| token = open(token_file).read().strip() | |
| print("β Found ngrok token") | |
| else: | |
| # Try default or ask | |
| token = "37qBoBViEaGHxxpXeAWfljhM2XA_4xv22ydkx6SN3jdPw7RwL" | |
| print("β Using default ngrok token") | |
| from pyngrok import ngrok | |
| ngrok.set_auth_token(token) | |
| # 3. Create Server Code | |
| server_code = ''' | |
| from fastapi import FastAPI | |
| from pydantic import BaseModel | |
| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import uvicorn | |
| import nest_asyncio | |
| from pyngrok import ngrok | |
| import os | |
| app = FastAPI() | |
| # Model | |
| MODEL_NAME = "meta-llama/Llama-3.2-1B" | |
| print(f"Loading {MODEL_NAME}...") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| if tokenizer.pad_token is None: tokenizer.pad_token = tokenizer.eos_token | |
| model = AutoModelForCausalLM.from_pretrained( | |
| MODEL_NAME, | |
| torch_dtype=torch.float16, | |
| device_map="auto" | |
| ) | |
| print("Model loaded") | |
| class Req(BaseModel): | |
| prompt: str | |
| max_tokens: int = 200 | |
| @app.post("/generate") | |
| def generate(req: Req): | |
| inputs = tokenizer(req.prompt, return_tensors="pt").to(model.device) | |
| with torch.no_grad(): | |
| out = model.generate(**inputs, max_new_tokens=req.max_tokens) | |
| return {"text": tokenizer.decode(out[0], skip_special_tokens=True)} | |
| @app.get("/") | |
| def root(): | |
| return {"status": "ok", "gpu": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "cpu"} | |
| # Check if port is taken, kill if needed | |
| import os | |
| os.system("fuser -k 8000/tcp") | |
| # Start tunnel | |
| nest_asyncio.apply() | |
| public_url = ngrok.connect(8000).public_url | |
| print(f"\\nπ PUBLIC URL: {public_url}\\n") | |
| with open("/content/node_url.txt", "w") as f: f.write(public_url) | |
| uvicorn.run(app, host="0.0.0.0", port=8000) | |
| ''' | |
| with open("server.py", "w") as f: | |
| f.write(server_code) | |
| # 4. Run Server | |
| print("\nπ₯ Starting Node Server...") | |
| print("Check the output for the PUBLIC URL") | |
| os.system(f"{sys.executable} server.py") | |