Spaces:

Goated121
/

ChatBot

Sleeping

Goated121 commited on 28 days ago

Commit

ccff6ac

verified ·

1 Parent(s): 5e5eab9

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -70,24 +70,47 @@ def retrieve_context(query):
 # -----------------------------
 # Load Qwen model (CPU only, no accelerate)
 # -----------------------------
-model_name = "Qwen/Qwen3.5-0.8B-Instruct"
-tokenizer = AutoTokenizer.from_pretrained(model_name)
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
-    torch_dtype=torch.float32  # CPU only
 )
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=150,
     do_sample=True,
-    temperature=0.6,
-    device=-1  # ensures CPU is used
 )
 print("LLM loaded successfully!")
 # -----------------------------

 # -----------------------------
 # Load Qwen model (CPU only, no accelerate)
 # -----------------------------
+import os
+import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+# 1. Access the token from Space Secrets
+# Make sure you've added "HF_TOKEN" in your Space Settings > Variables and Secrets
+hf_token = os.getenv("HF_TOKEN")
+# 2. Use a confirmed model path (Qwen2.5-1.5B or Qwen2.5-0.5B are highly reliable)
+# If you are certain about 3.5, ensure the spelling matches the HF Repo exactly.
+model_name = "Qwen/Qwen2.5-0.5B-Instruct"
+# 3. Load Tokenizer with authentication
+tokenizer = AutoTokenizer.from_pretrained(
+    model_name,
+    token=hf_token
+)
+# 4. Load Model with authentication
 model = AutoModelForCausalLM.from_pretrained(
     model_name,
+    token=hf_token,
+    torch_dtype=torch.float32,  # Optimized for CPU
+    device_map="cpu"            # Explicitly force CPU
 )
+# 5. Setup Pipeline
 generator = pipeline(
     "text-generation",
     model=model,
     tokenizer=tokenizer,
     max_new_tokens=150,
     do_sample=True,
+    temperature=0.6
 )
+# Usage Example:
+# result = generator("How do I run a Flutter project?")
+# print(result[0]['generated_text'])
 print("LLM loaded successfully!")
 # -----------------------------