DataChem commited on
Commit
a69755d
·
verified ·
1 Parent(s): 3895f1c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +9 -2
app.py CHANGED
@@ -6,6 +6,13 @@ import torch.nn.functional as F
6
 
7
  app = FastAPI()
8
 
 
 
 
 
 
 
 
9
  # -------------------------------------------------------------------------
10
  # Update this to the Llama 2 Chat model you prefer. This example uses the
11
  # 7B chat version. For larger models (13B, 70B), ensure you have enough RAM.
@@ -22,7 +29,7 @@ print(f"Loading model/tokenizer from: {model_name}")
22
  tokenizer = AutoTokenizer.from_pretrained(
23
  model_name,
24
  trust_remote_code=True
25
- # use_auth_token="YOUR_HF_TOKEN", # If needed for private/gated model
26
  )
27
 
28
  # -------------------------------------------------------------------------
@@ -38,7 +45,7 @@ tokenizer = AutoTokenizer.from_pretrained(
38
  model = AutoModelForCausalLM.from_pretrained(
39
  model_name,
40
  trust_remote_code=True
41
- # use_auth_token="YOUR_HF_TOKEN", # If needed
42
  )
43
 
44
  # Choose device based on availability
 
6
 
7
  app = FastAPI()
8
 
9
+ # Retrieve the token from environment variable
10
+ hf_token = os.environ.get("HF_AUTH_TOKEN", None)
11
+ if hf_token is None:
12
+ print("WARNING: No HF_AUTH_TOKEN found in environment. "
13
+ "Make sure to set a Hugging Face token if the model is gated.")
14
+
15
+
16
  # -------------------------------------------------------------------------
17
  # Update this to the Llama 2 Chat model you prefer. This example uses the
18
  # 7B chat version. For larger models (13B, 70B), ensure you have enough RAM.
 
29
  tokenizer = AutoTokenizer.from_pretrained(
30
  model_name,
31
  trust_remote_code=True
32
+ use_auth_token=hf_token, # If needed for private/gated model
33
  )
34
 
35
  # -------------------------------------------------------------------------
 
45
  model = AutoModelForCausalLM.from_pretrained(
46
  model_name,
47
  trust_remote_code=True
48
+ use_auth_token=hf_token, # If needed
49
  )
50
 
51
  # Choose device based on availability