masanorihirano commited on
Commit
bed8c52
1 Parent(s): c693f6c

added compress

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -17,6 +17,7 @@ from fastchat.model.model_adapter import BaseAdapter
17
  from fastchat.model.model_adapter import load_model
18
  from fastchat.model.model_adapter import model_adapters
19
  from fastchat.serve.cli import SimpleChatIO
 
20
  from fastchat.serve.inference import generate_stream
21
  from huggingface_hub import Repository
22
  from huggingface_hub import snapshot_download
@@ -70,6 +71,8 @@ def load_lora_model(
70
  cpu_offloading=cpu_offloading,
71
  debug=debug,
72
  )
 
 
73
  if lora_weight is not None:
74
  # model = PeftModelForCausalLM.from_pretrained(model, model_path, **kwargs)
75
  config = LoraConfig.from_pretrained(lora_weight)
 
17
  from fastchat.model.model_adapter import load_model
18
  from fastchat.model.model_adapter import model_adapters
19
  from fastchat.serve.cli import SimpleChatIO
20
+ from fastchat.serve.inference import compress_module
21
  from fastchat.serve.inference import generate_stream
22
  from huggingface_hub import Repository
23
  from huggingface_hub import snapshot_download
 
71
  cpu_offloading=cpu_offloading,
72
  debug=debug,
73
  )
74
+ if load_8bit:
75
+ compress_module(model)
76
  if lora_weight is not None:
77
  # model = PeftModelForCausalLM.from_pretrained(model, model_path, **kwargs)
78
  config = LoraConfig.from_pretrained(lora_weight)