rphrp1985 commited on
Commit
2d9088a
1 Parent(s): e4cf8be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -2
app.py CHANGED
@@ -60,7 +60,7 @@ model = AutoModelForCausalLM.from_pretrained(model_id, token= token,
60
 
61
  # Load the model with the inferred device map
62
  # model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
63
- model.half()
64
 
65
 
66
 
@@ -73,7 +73,7 @@ def respond(
73
  temperature,
74
  top_p,
75
  ):
76
-
77
  messages = [{"role": "user", "content": "Hello, how are you?"}]
78
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
79
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>
 
60
 
61
  # Load the model with the inferred device map
62
  # model = load_checkpoint_and_dispatch(model, model_id, device_map=device_map, no_split_module_classes=["GPTJBlock"])
63
+ # model.half()
64
 
65
 
66
 
 
73
  temperature,
74
  top_p,
75
  ):
76
+ model= model.to('cuda')
77
  messages = [{"role": "user", "content": "Hello, how are you?"}]
78
  input_ids = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt").to('cuda')
79
  ## <BOS_TOKEN><|START_OF_TURN_TOKEN|><|USER_TOKEN|>Hello, how are you?<|END_OF_TURN_TOKEN|><|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>