Goated121 commited on
Commit
ccff6ac
·
verified ·
1 Parent(s): 5e5eab9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +28 -5
app.py CHANGED
@@ -70,24 +70,47 @@ def retrieve_context(query):
70
  # -----------------------------
71
  # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
73
- model_name = "Qwen/Qwen3.5-0.8B-Instruct"
74
 
75
- tokenizer = AutoTokenizer.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  model = AutoModelForCausalLM.from_pretrained(
77
  model_name,
78
- torch_dtype=torch.float32 # CPU only
 
 
79
  )
80
 
 
81
  generator = pipeline(
82
  "text-generation",
83
  model=model,
84
  tokenizer=tokenizer,
85
  max_new_tokens=150,
86
  do_sample=True,
87
- temperature=0.6,
88
- device=-1 # ensures CPU is used
89
  )
90
 
 
 
 
 
91
  print("LLM loaded successfully!")
92
 
93
  # -----------------------------
 
70
  # -----------------------------
71
  # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
 
73
 
74
+ import os
75
+ import torch
76
+ from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
77
+
78
+ # 1. Access the token from Space Secrets
79
+ # Make sure you've added "HF_TOKEN" in your Space Settings > Variables and Secrets
80
+ hf_token = os.getenv("HF_TOKEN")
81
+
82
+ # 2. Use a confirmed model path (Qwen2.5-1.5B or Qwen2.5-0.5B are highly reliable)
83
+ # If you are certain about 3.5, ensure the spelling matches the HF Repo exactly.
84
+ model_name = "Qwen/Qwen2.5-0.5B-Instruct"
85
+
86
+ # 3. Load Tokenizer with authentication
87
+ tokenizer = AutoTokenizer.from_pretrained(
88
+ model_name,
89
+ token=hf_token
90
+ )
91
+
92
+ # 4. Load Model with authentication
93
  model = AutoModelForCausalLM.from_pretrained(
94
  model_name,
95
+ token=hf_token,
96
+ torch_dtype=torch.float32, # Optimized for CPU
97
+ device_map="cpu" # Explicitly force CPU
98
  )
99
 
100
+ # 5. Setup Pipeline
101
  generator = pipeline(
102
  "text-generation",
103
  model=model,
104
  tokenizer=tokenizer,
105
  max_new_tokens=150,
106
  do_sample=True,
107
+ temperature=0.6
 
108
  )
109
 
110
+ # Usage Example:
111
+ # result = generator("How do I run a Flutter project?")
112
+ # print(result[0]['generated_text'])
113
+
114
  print("LLM loaded successfully!")
115
 
116
  # -----------------------------