3ML_bot_RTL

Sleeping

nikravan commited on Jun 18

Commit

148c35f

•

1 Parent(s): 4aeaa92

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -39,10 +39,11 @@ quantization_config = BitsAndBytesConfig(
 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
     low_cpu_mem_usage=True,
     trust_remote_code=True,
-    return_dict=True,
      quantization_config=quantization_config
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)

 model = AutoModelForCausalLM.from_pretrained(
     MODEL_ID,
+    torch_dtype=inference_dtype,
+    device_map = "cuda:0",
     low_cpu_mem_usage=True,
     trust_remote_code=True,
      quantization_config=quantization_config
 )
 tokenizer = AutoTokenizer.from_pretrained(MODEL_ID, trust_remote_code=True)