8Bit = Low performance

#13
by dzmltzack - opened
Files changed (1) hide show
  1. app.py +1 -1
app.py CHANGED
@@ -8,7 +8,7 @@ import torch
8
  print(f"Is CUDA available: {torch.cuda.is_available()}")
9
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
10
 
11
- pipe_flan = pipeline("text2text-generation", model="philschmid/flan-t5-xxl-sharded-fp16", model_kwargs={"load_in_8bit":True, "device_map": "auto"})
12
  pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
13
 
14
  examples = [
 
8
  print(f"Is CUDA available: {torch.cuda.is_available()}")
9
  print(f"CUDA device: {torch.cuda.get_device_name(torch.cuda.current_device())}")
10
 
11
+ pipe_flan = pipeline("text2text-generation", model="google/flan-t5-xl", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
12
  pipe_vanilla = pipeline("text2text-generation", model="t5-large", device="cuda:0", model_kwargs={"torch_dtype":torch.bfloat16})
13
 
14
  examples = [