Speed up the inference a little bit

#1
by multimodalart HF staff - opened
Files changed (1) hide show
  1. handler.py +4 -2
handler.py CHANGED
@@ -23,7 +23,8 @@ class EndpointHandler:
23
  ).to(device)
24
 
25
  self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors")
26
-
 
27
  text_encoders = [self.pipe.text_encoder, self.pipe.text_encoder_2]
28
  tokenizers = [self.pipe.tokenizer, self.pipe.tokenizer_2]
29
 
@@ -44,10 +45,11 @@ class EndpointHandler:
44
  inputs = data.pop("inputs", data)
45
 
46
  # Automatically add trigger tokens to the beginning of the prompt
47
- full_prompt = f"A <s0><s1> {inputs}"
48
  images = self.pipe(
49
  full_prompt,
50
  cross_attention_kwargs={"scale": 0.8},
 
51
  ).images
52
  image = images[0]
53
 
 
23
  ).to(device)
24
 
25
  self.pipe.load_lora_weights("SvenN/sdxl-emoji", weight_name="lora.safetensors")
26
+ self.pipe.fuse_lora()
27
+
28
  text_encoders = [self.pipe.text_encoder, self.pipe.text_encoder_2]
29
  tokenizers = [self.pipe.tokenizer, self.pipe.tokenizer_2]
30
 
 
45
  inputs = data.pop("inputs", data)
46
 
47
  # Automatically add trigger tokens to the beginning of the prompt
48
+ full_prompt = f"A <s0><s1> emoji {inputs}"
49
  images = self.pipe(
50
  full_prompt,
51
  cross_attention_kwargs={"scale": 0.8},
52
+ num_inference_steps=25
53
  ).images
54
  image = images[0]
55