Spaces:

DataChem
/

custom-api

Paused

DataChem commited on Dec 29, 2024

Commit

a69755d

verified ·

1 Parent(s): 3895f1c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,6 +6,13 @@ import torch.nn.functional as F
 app = FastAPI()
 # -------------------------------------------------------------------------
 # Update this to the Llama 2 Chat model you prefer. This example uses the
 # 7B chat version. For larger models (13B, 70B), ensure you have enough RAM.
@@ -22,7 +29,7 @@ print(f"Loading model/tokenizer from: {model_name}")
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
     trust_remote_code=True
-    # use_auth_token="YOUR_HF_TOKEN",  # If needed for private/gated model
 )
 # -------------------------------------------------------------------------
@@ -38,7 +45,7 @@ tokenizer = AutoTokenizer.from_pretrained(
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True
-    # use_auth_token="YOUR_HF_TOKEN",  # If needed
 )
 # Choose device based on availability

 app = FastAPI()
+# Retrieve the token from environment variable
+hf_token = os.environ.get("HF_AUTH_TOKEN", None)
+if hf_token is None:
+    print("WARNING: No HF_AUTH_TOKEN found in environment. "
+          "Make sure to set a Hugging Face token if the model is gated.")
 # -------------------------------------------------------------------------
 # Update this to the Llama 2 Chat model you prefer. This example uses the
 # 7B chat version. For larger models (13B, 70B), ensure you have enough RAM.
 tokenizer = AutoTokenizer.from_pretrained(
     model_name,
     trust_remote_code=True
+    use_auth_token=hf_token,  # If needed for private/gated model
 )
 # -------------------------------------------------------------------------
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
     trust_remote_code=True
+    use_auth_token=hf_token,  # If needed
 )
 # Choose device based on availability