Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -6,6 +6,13 @@ import torch.nn.functional as F
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
# -------------------------------------------------------------------------
|
10 |
# Update this to the Llama 2 Chat model you prefer. This example uses the
|
11 |
# 7B chat version. For larger models (13B, 70B), ensure you have enough RAM.
|
@@ -22,7 +29,7 @@ print(f"Loading model/tokenizer from: {model_name}")
|
|
22 |
tokenizer = AutoTokenizer.from_pretrained(
|
23 |
model_name,
|
24 |
trust_remote_code=True
|
25 |
-
|
26 |
)
|
27 |
|
28 |
# -------------------------------------------------------------------------
|
@@ -38,7 +45,7 @@ tokenizer = AutoTokenizer.from_pretrained(
|
|
38 |
model = AutoModelForCausalLM.from_pretrained(
|
39 |
model_name,
|
40 |
trust_remote_code=True
|
41 |
-
|
42 |
)
|
43 |
|
44 |
# Choose device based on availability
|
|
|
6 |
|
7 |
app = FastAPI()
|
8 |
|
9 |
+
# Retrieve the token from environment variable
|
10 |
+
hf_token = os.environ.get("HF_AUTH_TOKEN", None)
|
11 |
+
if hf_token is None:
|
12 |
+
print("WARNING: No HF_AUTH_TOKEN found in environment. "
|
13 |
+
"Make sure to set a Hugging Face token if the model is gated.")
|
14 |
+
|
15 |
+
|
16 |
# -------------------------------------------------------------------------
|
17 |
# Update this to the Llama 2 Chat model you prefer. This example uses the
|
18 |
# 7B chat version. For larger models (13B, 70B), ensure you have enough RAM.
|
|
|
29 |
tokenizer = AutoTokenizer.from_pretrained(
|
30 |
model_name,
|
31 |
trust_remote_code=True
|
32 |
+
use_auth_token=hf_token, # If needed for private/gated model
|
33 |
)
|
34 |
|
35 |
# -------------------------------------------------------------------------
|
|
|
45 |
model = AutoModelForCausalLM.from_pretrained(
|
46 |
model_name,
|
47 |
trust_remote_code=True
|
48 |
+
use_auth_token=hf_token, # If needed
|
49 |
)
|
50 |
|
51 |
# Choose device based on availability
|