Spaces:

rootxhacker
/

CodeAstra-7B-demo

Runtime error

rootxhacker commited on Jul 3

Commit

a93c076

•

1 Parent(s): 9d3ed05

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -7,17 +7,26 @@ import spaces
 # Load the model and tokenizer
 peft_model_id = "rootxhacker/CodeAstra-7B"
 config = PeftConfig.from_pretrained(peft_model_id)
-model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_4bit=True,device_map={"":0})
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 # Load the Lora model
 model = PeftModel.from_pretrained(model, peft_model_id)
 @spaces.GPU(duration=200)
 def get_completion(query, model, tokenizer):
-    inputs = tokenizer(query, return_tensors="pt")
     outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)

 # Load the model and tokenizer
 peft_model_id = "rootxhacker/CodeAstra-7B"
 config = PeftConfig.from_pretrained(peft_model_id)
+# Determine the device
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+# Load the model on the appropriate device
+model = AutoModelForCausalLM.from_pretrained(
+    config.base_model_name_or_path,
+    return_dict=True,
+    load_in_4bit=True,
+    device_map="auto"  # This will automatically handle device placement
+)
 tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
 # Load the Lora model
 model = PeftModel.from_pretrained(model, peft_model_id)
 @spaces.GPU(duration=200)
 def get_completion(query, model, tokenizer):
+    inputs = tokenizer(query, return_tensors="pt").to(device)  # Move inputs to the same device as the model
     outputs = model.generate(**inputs, max_new_tokens=512, do_sample=True, temperature=0.7)
     return tokenizer.decode(outputs[0], skip_special_tokens=True)