rootxhacker commited on
Commit
112e6b8
1 Parent(s): bbcea92

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -19
app.py CHANGED
@@ -4,43 +4,30 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
4
  import gradio as gr
5
  import spaces
6
 
7
- # Ensure CUDA is available
8
- assert torch.cuda.is_available(), "CUDA is not available. Please check your GPU setup."
9
-
10
- # Set the device
11
- device = torch.device("cuda")
12
- torch.cuda.set_device(0) # Use the first GPU if multiple are available
13
-
14
  # Load the model and tokenizer
15
  peft_model_id = "rootxhacker/CodeAstra-7B"
16
  config = PeftConfig.from_pretrained(peft_model_id)
17
 
18
- # Load the model on GPU
19
  model = AutoModelForCausalLM.from_pretrained(
20
  config.base_model_name_or_path,
21
  return_dict=True,
22
  load_in_4bit=True,
23
- torch_dtype=torch.float16,
24
- device_map="auto"
25
  )
26
 
27
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
28
 
29
  # Load the Lora model
30
  model = PeftModel.from_pretrained(model, peft_model_id)
31
- model.to(device)
32
-
33
- # Ensure all model parameters are on CUDA
34
- for param in model.parameters():
35
- param.data = param.data.to(device)
36
 
37
  @spaces.GPU(duration=200)
38
  def get_completion(query, model, tokenizer):
39
  try:
40
- inputs = tokenizer(query, return_tensors="pt").to(device)
41
  with torch.no_grad():
42
  outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
43
- return tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
44
  except Exception as e:
45
  return f"An error occurred: {str(e)}"
46
 
@@ -59,5 +46,5 @@ iface = gr.Interface(
59
  description="This tool analyzes code for potential security flaws and provides guidance on secure coding practices."
60
  )
61
 
62
- # Launch the Gradio app with a public link
63
- iface.launch()
 
4
  import gradio as gr
5
  import spaces
6
 
 
 
 
 
 
 
 
7
  # Load the model and tokenizer
8
  peft_model_id = "rootxhacker/CodeAstra-7B"
9
  config = PeftConfig.from_pretrained(peft_model_id)
10
 
11
+ # Load the model without explicit device mapping
12
  model = AutoModelForCausalLM.from_pretrained(
13
  config.base_model_name_or_path,
14
  return_dict=True,
15
  load_in_4bit=True,
16
+ device_map=None # Let the Spaces environment handle device mapping
 
17
  )
18
 
19
  tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
20
 
21
  # Load the Lora model
22
  model = PeftModel.from_pretrained(model, peft_model_id)
 
 
 
 
 
23
 
24
  @spaces.GPU(duration=200)
25
  def get_completion(query, model, tokenizer):
26
  try:
27
+ inputs = tokenizer(query, return_tensors="pt")
28
  with torch.no_grad():
29
  outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
30
+ return tokenizer.decode(outputs[0], skip_special_tokens=True)
31
  except Exception as e:
32
  return f"An error occurred: {str(e)}"
33
 
 
46
  description="This tool analyzes code for potential security flaws and provides guidance on secure coding practices."
47
  )
48
 
49
+ # Launch the Gradio app
50
+ iface.launch()