arasaltan commited on
Commit
fb13d75
Β·
verified Β·
1 Parent(s): 2531a82

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -10
app.py CHANGED
@@ -10,20 +10,16 @@ LORA_PATH = "./"
10
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
- # Base model (disk offload enabled)
14
- base_model = AutoModelForCausalLM.from_pretrained(
15
  BASE_MODEL,
16
  torch_dtype=torch.float32,
17
- device_map="auto",
18
- offload_folder="offload",
19
- offload_state_dict=True,
20
  low_cpu_mem_usage=True
21
  )
22
 
23
- base_model.config.use_cache = False
24
-
25
- # Load LoRA (SADECE 1 KEZ)
26
- model = PeftModel.from_pretrained(base_model, LORA_PATH)
27
  model.eval()
28
 
29
 
@@ -45,7 +41,7 @@ Answer:
45
  output = model.generate(
46
  **inputs,
47
  max_new_tokens=int(max_tokens),
48
- do_sample=False,
49
  eos_token_id=tokenizer.eos_token_id
50
  )
51
 
@@ -53,6 +49,7 @@ Answer:
53
  return tokenizer.decode(generated, skip_special_tokens=True)
54
 
55
 
 
56
  demo = gr.Interface(
57
  fn=chat,
58
  inputs=[
 
10
  tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
11
  tokenizer.pad_token = tokenizer.eos_token
12
 
13
+ # Base model (CPU)
14
+ model = AutoModelForCausalLM.from_pretrained(
15
  BASE_MODEL,
16
  torch_dtype=torch.float32,
17
+ device_map={"": "cpu"},
 
 
18
  low_cpu_mem_usage=True
19
  )
20
 
21
+ # Load LoRA
22
+ model = PeftModel.from_pretrained(model, LORA_PATH)
 
 
23
  model.eval()
24
 
25
 
 
41
  output = model.generate(
42
  **inputs,
43
  max_new_tokens=int(max_tokens),
44
+ do_sample=False, # CPU iΓ§in hΔ±zlΔ±
45
  eos_token_id=tokenizer.eos_token_id
46
  )
47
 
 
49
  return tokenizer.decode(generated, skip_special_tokens=True)
50
 
51
 
52
+ # Gradio UI
53
  demo = gr.Interface(
54
  fn=chat,
55
  inputs=[