Goated121 commited on
Commit
28ffb6e
·
verified ·
1 Parent(s): d864053

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -4
app.py CHANGED
@@ -71,13 +71,11 @@ def retrieve_context(query):
71
  # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
73
  model_name = "meta-llama/Llama-3.2-1B"
74
-
75
  tokenizer = AutoTokenizer.from_pretrained(model_name)
76
  model = AutoModelForCausalLM.from_pretrained(
77
  model_name,
78
- torch_dtype=torch.float32 # CPU only
79
  )
80
-
81
  generator = pipeline(
82
  "text-generation",
83
  model=model,
@@ -85,7 +83,7 @@ generator = pipeline(
85
  max_new_tokens=150,
86
  do_sample=True,
87
  temperature=0.6,
88
- device=-1 # ensures CPU is used
89
  )
90
 
91
  print("LLM loaded successfully!")
 
71
  # Load Qwen model (CPU only, no accelerate)
72
  # -----------------------------
73
  model_name = "meta-llama/Llama-3.2-1B"
 
74
  tokenizer = AutoTokenizer.from_pretrained(model_name)
75
  model = AutoModelForCausalLM.from_pretrained(
76
  model_name,
77
+ torch_dtype=torch.float32 # CPU-friendly
78
  )
 
79
  generator = pipeline(
80
  "text-generation",
81
  model=model,
 
83
  max_new_tokens=150,
84
  do_sample=True,
85
  temperature=0.6,
86
+ device=-1 # CPU
87
  )
88
 
89
  print("LLM loaded successfully!")