from flask import Flask, request, jsonify from transformers import AutoTokenizer, AutoModelForCausalLM from huggingface_hub import login import torch import os app = Flask(__name__) # ✅ Securely fetch HF Token from environment (invisible to users) hf_token = os.getenv("HF_TOKEN") if not hf_token: raise ValueError("HF_TOKEN is not set in environment variables!") # 🔐 Authenticate login(token=hf_token) # 🔄 Load model from Hugging Face model_id = "Salesforce/codegen2-1B" print("🔄 Loading model...") # ⚠️ FIXED this line: fast tokenizer disabled to avoid tokenizer.json crash tokenizer = AutoTokenizer.from_pretrained(model_id, token=hf_token, use_fast=False) model = AutoModelForCausalLM.from_pretrained(model_id, token=hf_token) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") model.to(device) print("✅ Model loaded!") @app.route('/chat', methods=['POST']) def chat(): try: data = request.get_json() msg = data.get("message", "") if not msg: return jsonify({"error": "No message sent"}), 400 prompt = f"User: {msg}\nDex:" inputs = tokenizer(prompt, return_tensors="pt").to(device) outputs = model.generate( inputs.input_ids, max_length=256, do_sample=True, top_k=50, top_p=0.95, temperature=0.7, pad_token_id=tokenizer.eos_token_id ) text = tokenizer.decode(outputs[0], skip_special_tokens=True) reply = text.split("Dex:")[-1].strip() return jsonify({"reply": reply}) except Exception as e: return jsonify({"error": str(e)}), 500 if __name__ == "__main__": app.run(host='0.0.0.0', port=7860)