Spaces:

rootxhacker
/

CodeAstra-7B-demo

Runtime error

App Files Files Community

rootxhacker commited on Jul 3, 2024

Commit

bbcea92

verified ·

1 Parent(s): 5eafbb3

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -8

app.py CHANGED Viewed

@@ -4,31 +4,49 @@ from transformers import AutoModelForCausalLM, AutoTokenizer
 import gradio as gr
 import spaces
 peft_model_id = "rootxhacker/CodeAstra-7B"
 config = PeftConfig.from_pretrained(peft_model_id)
-device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-print(device)
 model = AutoModelForCausalLM.from_pretrained(
     config.base_model_name_or_path,
     return_dict=True,
     load_in_4bit=True,
-    device_map="auto"  # This will automatically handle device placement
-    )
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 model = PeftModel.from_pretrained(model, peft_model_id)
 @spaces.GPU(duration=200)
 def get_completion(query, model, tokenizer):
-    inputs = tokenizer(query, return_tensors="pt").to(device)  # Move inputs to the same device as the model
-    outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
-    return tokenizer.decode(outputs[0], skip_special_tokens=True)
 @spaces.GPU(duration=200)
 def code_review(code_to_analyze):
-    query = f"As a code review expert, your role will be to carefully examine the code for potential security flaws and provide guidance on secure coding practices. This may include identifying common coding mistakes that could lead to vulnerabilities, suggesting ways to improve the code's overall security, and recommending tools or techniques that can be used to detect and prevent potential threats. Your expertise in security will be particularly valuable in ensuring that any code developed meets the highest security standard:\n{code_to_analyze}"
     result = get_completion(query, model, tokenizer)
     return result

 import gradio as gr
 import spaces
+# Ensure CUDA is available
+assert torch.cuda.is_available(), "CUDA is not available. Please check your GPU setup."
+# Set the device
+device = torch.device("cuda")
+torch.cuda.set_device(0)  # Use the first GPU if multiple are available
+# Load the model and tokenizer
 peft_model_id = "rootxhacker/CodeAstra-7B"
 config = PeftConfig.from_pretrained(peft_model_id)
+# Load the model on GPU
 model = AutoModelForCausalLM.from_pretrained(
     config.base_model_name_or_path,
     return_dict=True,
     load_in_4bit=True,
+    torch_dtype=torch.float16,
+    device_map="auto"
+)
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
+# Load the Lora model
 model = PeftModel.from_pretrained(model, peft_model_id)
+model.to(device)
+# Ensure all model parameters are on CUDA
+for param in model.parameters():
+    param.data = param.data.to(device)
 @spaces.GPU(duration=200)
 def get_completion(query, model, tokenizer):
+    try:
+        inputs = tokenizer(query, return_tensors="pt").to(device)
+        with torch.no_grad():
+            outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
+        return tokenizer.decode(outputs[0].cpu(), skip_special_tokens=True)
+    except Exception as e:
+        return f"An error occurred: {str(e)}"
 @spaces.GPU(duration=200)
 def code_review(code_to_analyze):
+    query = f"As a code review expert, examine the following code for potential security flaws and provide guidance on secure coding practices:\n{code_to_analyze}"
     result = get_completion(query, model, tokenizer)
     return result