rootxhacker commited on
Commit
ccad9ef
1 Parent(s): 112e6b8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +29 -4
app.py CHANGED
@@ -8,12 +8,24 @@ import spaces
8
  peft_model_id = "rootxhacker/CodeAstra-7B"
9
  config = PeftConfig.from_pretrained(peft_model_id)
10
 
11
- # Load the model without explicit device mapping
 
 
 
 
 
 
 
 
 
 
 
 
12
  model = AutoModelForCausalLM.from_pretrained(
13
  config.base_model_name_or_path,
14
  return_dict=True,
15
  load_in_4bit=True,
16
- device_map=None # Let the Spaces environment handle device mapping
17
  )
18
 
19
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
@@ -24,12 +36,25 @@ model = PeftModel.from_pretrained(model, peft_model_id)
24
  @spaces.GPU(duration=200)
25
  def get_completion(query, model, tokenizer):
26
  try:
27
- inputs = tokenizer(query, return_tensors="pt")
 
 
 
 
 
28
  with torch.no_grad():
29
  outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
 
 
 
 
30
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
31
  except Exception as e:
32
  return f"An error occurred: {str(e)}"
 
 
 
 
33
 
34
  @spaces.GPU(duration=200)
35
  def code_review(code_to_analyze):
@@ -47,4 +72,4 @@ iface = gr.Interface(
47
  )
48
 
49
  # Launch the Gradio app
50
- iface.launch()
 
8
  peft_model_id = "rootxhacker/CodeAstra-7B"
9
  config = PeftConfig.from_pretrained(peft_model_id)
10
 
11
+ # Function to move tensors to CPU
12
+ def to_cpu(obj):
13
+ if isinstance(obj, torch.Tensor):
14
+ return obj.cpu()
15
+ elif isinstance(obj, list):
16
+ return [to_cpu(item) for item in obj]
17
+ elif isinstance(obj, tuple):
18
+ return tuple(to_cpu(item) for item in obj)
19
+ elif isinstance(obj, dict):
20
+ return {key: to_cpu(value) for key, value in obj.items()}
21
+ return obj
22
+
23
+ # Load the model
24
  model = AutoModelForCausalLM.from_pretrained(
25
  config.base_model_name_or_path,
26
  return_dict=True,
27
  load_in_4bit=True,
28
+ device_map='auto'
29
  )
30
 
31
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 
36
  @spaces.GPU(duration=200)
37
  def get_completion(query, model, tokenizer):
38
  try:
39
+ # Move model to CUDA
40
+ model = model.cuda()
41
+
42
+ # Ensure input is on CUDA
43
+ inputs = tokenizer(query, return_tensors="pt").to('cuda')
44
+
45
  with torch.no_grad():
46
  outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
47
+
48
+ # Move outputs to CPU before decoding
49
+ outputs = to_cpu(outputs)
50
+
51
  return tokenizer.decode(outputs[0], skip_special_tokens=True)
52
  except Exception as e:
53
  return f"An error occurred: {str(e)}"
54
+ finally:
55
+ # Move model back to CPU to free up GPU memory
56
+ model = model.cpu()
57
+ torch.cuda.empty_cache()
58
 
59
  @spaces.GPU(duration=200)
60
  def code_review(code_to_analyze):
 
72
  )
73
 
74
  # Launch the Gradio app
75
+ iface.launch()