John6666 commited on
Commit
afb84a9
·
verified ·
1 Parent(s): be764de

Upload 4 files

Browse files
Files changed (2) hide show
  1. app.py +4 -2
  2. joycaption.py +38 -37
app.py CHANGED
@@ -4,7 +4,8 @@ from joycaption import stream_chat_mod, get_text_model, change_text_model, get_r
4
 
5
  JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
6
  JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
7
- [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha). Thanks to [dominic1021](https://huggingface.co/dominic1021)"""
 
8
 
9
  css = """
10
  .info {text-align:center; !important}
@@ -65,6 +66,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
65
  jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
66
  allow_custom_value=True, min_width=320, visible=False)
67
  jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
 
68
  jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
69
  jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
70
  with gr.Row():
@@ -81,7 +83,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
81
 
82
  jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
83
  jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
84
- jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
85
  #jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
86
  #jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
87
  #jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
 
4
 
5
  JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
6
  JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
7
+ [Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha).
8
+ Thanks to [dominic1021](https://huggingface.co/dominic1021), [IceHibiki](https://huggingface.co/IceHibiki)."""
9
 
10
  css = """
11
  .info {text-align:center; !important}
 
66
  jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
67
  allow_custom_value=True, min_width=320, visible=False)
68
  jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
69
+ jc_lora = gr.Checkbox(label="Use Custom VLM", info="Llama 3 BF16 only", value=True)
70
  jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
71
  jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
72
  with gr.Row():
 
83
 
84
  jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
85
  jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
86
+ jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4, jc_lora], [jc_text_model], show_api=False)
87
  #jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
88
  #jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
89
  #jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
joycaption.py CHANGED
@@ -9,7 +9,7 @@ else:
9
  return func(*args, **kwargs)
10
  return wrapper
11
  import gradio as gr
12
- from huggingface_hub import InferenceClient
13
  from torch import nn
14
  from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
15
  from pathlib import Path
@@ -18,11 +18,15 @@ import torch.amp.autocast_mode
18
  from PIL import Image
19
  import torchvision.transforms.functional as TVF
20
  import gc
21
- from peft import PeftConfig
22
  from typing import Union
23
 
24
- import subprocess
25
- subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
 
 
 
 
26
 
27
  BASE_DIR = Path(__file__).resolve().parent # Define the base directory
28
  device = "cuda" if torch.cuda.is_available() else "cpu"
@@ -38,7 +42,8 @@ llm_models = {
38
  "unsloth/Meta-Llama-3.1-8B-bnb-4bit": None,
39
  "DevQuasar/HermesNova-Llama-3.1-8B": None,
40
  "mergekit-community/L3.1-Boshima-b-FIX": None,
41
- "meta-llama/Meta-Llama-3.1-8B": None, # gated
 
42
  }
43
 
44
  CLIP_PATH = "google/siglip-so400m-patch14-384"
@@ -158,25 +163,26 @@ class ImageAdapter(nn.Module):
158
  # https://huggingface.co/docs/transformers/main/en/peft#enable-and-disable-adapters
159
  # https://huggingface.co/docs/transformers/main/quantization/bitsandbytes?bnb=4-bit
160
  # https://huggingface.co/lllyasviel/flux1-dev-bnb-nf4
 
 
161
  tokenizer = None
162
  text_model_client = None
163
  text_model = None
164
  image_adapter = None
165
- peft_config = None
166
  pixtral_model = None
167
  pixtral_processor = None
168
- def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True):
169
- global tokenizer, text_model, image_adapter, peft_config, pixtral_model, pixtral_processor, text_model_client, use_inference_client
170
  try:
171
  tokenizer = None
172
  text_model_client = None
173
  text_model = None
174
  image_adapter = None
175
- peft_config = None
176
  pixtral_model = None
177
  pixtral_processor = None
178
  torch.cuda.empty_cache()
179
  gc.collect()
 
180
 
181
  from transformers import BitsAndBytesConfig
182
  nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
@@ -202,23 +208,25 @@ def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None
202
  if device == "cpu":
203
  text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
204
  elif is_nf4:
205
- text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
206
  else:
207
- text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16).eval()
208
  else:
209
  if device == "cpu":
210
  text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
211
  elif is_nf4:
212
  text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
213
  else:
214
- text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=device, torch_dtype=torch.bfloat16).eval()
215
 
216
- if LORA_PATH.exists():
217
  print("Loading VLM's custom text model")
218
- if is_nf4: peft_config = PeftConfig.from_pretrained(LORA_PATH, device_map=device, quantization_config=nf4_config)
219
- else: peft_config = PeftConfig.from_pretrained(LORA_PATH, device_map=device)
220
- text_model.add_adapter(peft_config)
221
- text_model.enable_adapters()
 
 
222
 
223
  print("Loading image adapter")
224
  image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
@@ -237,6 +245,7 @@ load_text_model.zerogpu = True
237
  print("Loading CLIP")
238
  clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
239
  clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
 
240
  if (CHECKPOINT_PATH / "clip_model.pt").exists():
241
  print("Loading VLM's custom vision model")
242
  checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
@@ -251,15 +260,15 @@ clip_model.eval().requires_grad_(False).to(device)
251
  #load_text_model(PIXTRAL_PATHS[0])
252
  #print(f"pixtral_model: {type(pixtral_model)}") #
253
  #print(f"pixtral_processor: {type(pixtral_processor)}") #
254
- load_text_model()
255
- print(f"pixtral_model: {type(pixtral_model)}") #
256
- print(f"pixtral_processor: {type(pixtral_processor)}") #
257
 
258
  @spaces.GPU()
259
  @torch.inference_mode()
260
  def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
261
  max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
262
- global tokenizer, text_model, image_adapter, peft_config, pixtral_model, pixtral_processor, text_model_client, use_inference_client
263
  torch.cuda.empty_cache()
264
  gc.collect()
265
 
@@ -302,7 +311,6 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length:
302
  print(f"pixtral_model: {type(pixtral_model)}") #
303
  print(f"pixtral_processor: {type(pixtral_processor)}") #
304
  input_images = [input_image.convert("RGB")]
305
- #input_prompt = f"[INST]{prompt_str}\n[IMG][/INST]"
306
  input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
307
  inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
308
  generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
@@ -373,7 +381,7 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length:
373
  attention_mask = torch.ones_like(input_ids)
374
 
375
  # Debugging
376
- print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
377
 
378
  text_model.to(device)
379
  generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
@@ -403,19 +411,16 @@ def is_repo_name(s):
403
 
404
 
405
  def is_repo_exists(repo_id):
406
- from huggingface_hub import HfApi
407
  try:
408
  api = HfApi(token=HF_TOKEN)
409
  if api.repo_exists(repo_id=repo_id): return True
410
  else: return False
411
  except Exception as e:
412
- print(f"Error: Failed to connect {repo_id}.")
413
- print(e)
414
  return True # for safe
415
 
416
 
417
  def is_valid_repo(repo_id):
418
- from huggingface_hub import HfApi
419
  import re
420
  try:
421
  if not re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', repo_id): return False
@@ -432,15 +437,13 @@ def get_text_model():
432
 
433
 
434
  def is_gguf_repo(repo_id: str):
435
- from huggingface_hub import HfApi
436
  try:
437
  api = HfApi(token=HF_TOKEN)
438
  if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return False
439
  files = api.list_repo_files(repo_id=repo_id)
440
  except Exception as e:
441
- print(f"Error: Failed to get {repo_id}'s info.")
442
- print(e)
443
- gr.Warning(f"Error: Failed to get {repo_id}'s info.")
444
  return False
445
  files = [f for f in files if f.endswith(".gguf")]
446
  if len(files) == 0: return False
@@ -448,15 +451,13 @@ def is_gguf_repo(repo_id: str):
448
 
449
 
450
  def get_repo_gguf(repo_id: str):
451
- from huggingface_hub import HfApi
452
  try:
453
  api = HfApi(token=HF_TOKEN)
454
  if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return gr.update(value="", choices=[])
455
  files = api.list_repo_files(repo_id=repo_id)
456
  except Exception as e:
457
- print(f"Error: Failed to get {repo_id}'s info.")
458
- print(e)
459
- gr.Warning(f"Error: Failed to get {repo_id}'s info.")
460
  return gr.update(value="", choices=[])
461
  files = [f for f in files if f.endswith(".gguf")]
462
  if len(files) == 0: return gr.update(value="", choices=[])
@@ -465,7 +466,7 @@ def get_repo_gguf(repo_id: str):
465
 
466
  @spaces.GPU()
467
  def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_file: Union[str, None]=None,
468
- is_nf4: bool=True, progress=gr.Progress(track_tqdm=True)):
469
  global use_inference_client, llm_models
470
  use_inference_client = use_client
471
  try:
@@ -477,7 +478,7 @@ def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_f
477
  if use_inference_client:
478
  pass #
479
  else:
480
- load_text_model(model_name, gguf_file, is_nf4)
481
  if model_name not in llm_models: llm_models[model_name] = gguf_file if gguf_file else None
482
  return gr.update(choices=get_text_model())
483
  except Exception as e:
 
9
  return func(*args, **kwargs)
10
  return wrapper
11
  import gradio as gr
12
+ from huggingface_hub import InferenceClient, HfApi
13
  from torch import nn
14
  from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
15
  from pathlib import Path
 
18
  from PIL import Image
19
  import torchvision.transforms.functional as TVF
20
  import gc
21
+ from peft import PeftModel
22
  from typing import Union
23
 
24
+ LOAD_IN_NF4 = True
25
+
26
+ if os.environ.get("SPACES_ZERO_GPU") is not None:
27
+ import subprocess
28
+ LOAD_IN_NF4 = False # If true, Custom VLM LoRA doesn't work initially. The rest are fine.
29
+ subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
30
 
31
  BASE_DIR = Path(__file__).resolve().parent # Define the base directory
32
  device = "cuda" if torch.cuda.is_available() else "cpu"
 
42
  "unsloth/Meta-Llama-3.1-8B-bnb-4bit": None,
43
  "DevQuasar/HermesNova-Llama-3.1-8B": None,
44
  "mergekit-community/L3.1-Boshima-b-FIX": None,
45
+ #"chuanli11/Llama-3.2-3B-Instruct-uncensored": None, # Error(s) in loading state_dict for ImageAdapter:\n\tsize mismatch for linear1.weight: copying a param with shape torch.Size([4096, 1152]) from checkpoint, the shape in current model is torch.Size([3072, 1152]).\n\tsize mismatch for linear1.bias: copying a param with shape torch.Size([4096]) from checkpoint,
46
+ "unsloth/Meta-Llama-3.1-8B-Instruct": None,
47
  }
48
 
49
  CLIP_PATH = "google/siglip-so400m-patch14-384"
 
163
  # https://huggingface.co/docs/transformers/main/en/peft#enable-and-disable-adapters
164
  # https://huggingface.co/docs/transformers/main/quantization/bitsandbytes?bnb=4-bit
165
  # https://huggingface.co/lllyasviel/flux1-dev-bnb-nf4
166
+ # https://github.com/huggingface/transformers/issues/28515
167
+ # https://gist.github.com/ChrisHayduk/1a53463331f52dca205e55982baf9930
168
  tokenizer = None
169
  text_model_client = None
170
  text_model = None
171
  image_adapter = None
 
172
  pixtral_model = None
173
  pixtral_processor = None
174
+ def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True, is_lora: bool=True):
175
+ global tokenizer, text_model, image_adapter, pixtral_model, pixtral_processor, text_model_client, use_inference_client
176
  try:
177
  tokenizer = None
178
  text_model_client = None
179
  text_model = None
180
  image_adapter = None
 
181
  pixtral_model = None
182
  pixtral_processor = None
183
  torch.cuda.empty_cache()
184
  gc.collect()
185
+ lora_device = "auto"
186
 
187
  from transformers import BitsAndBytesConfig
188
  nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
 
208
  if device == "cpu":
209
  text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
210
  elif is_nf4:
211
+ text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
212
  else:
213
+ text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=lora_device, torch_dtype=torch.bfloat16).eval()
214
  else:
215
  if device == "cpu":
216
  text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
217
  elif is_nf4:
218
  text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
219
  else:
220
+ text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=lora_device, torch_dtype=torch.bfloat16).eval()
221
 
222
+ if is_lora and LORA_PATH.exists() and not is_nf4:
223
  print("Loading VLM's custom text model")
224
+ if is_nf4: # omitted
225
+ text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device, quantization_config=nf4_config)
226
+ else:
227
+ text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device)
228
+ text_model = text_model.merge_and_unload(safe_merge=True) # to avoid PEFT bug https://github.com/huggingface/transformers/issues/28515
229
+ else: print("VLM's custom text model is not loaded")
230
 
231
  print("Loading image adapter")
232
  image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
 
245
  print("Loading CLIP")
246
  clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
247
  clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
248
+ assert (CHECKPOINT_PATH / "clip_model.pt").exists()
249
  if (CHECKPOINT_PATH / "clip_model.pt").exists():
250
  print("Loading VLM's custom vision model")
251
  checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
 
260
  #load_text_model(PIXTRAL_PATHS[0])
261
  #print(f"pixtral_model: {type(pixtral_model)}") #
262
  #print(f"pixtral_processor: {type(pixtral_processor)}") #
263
+ load_text_model(MODEL_PATH, None, LOAD_IN_NF4, True)
264
+ #print(f"pixtral_model: {type(pixtral_model)}") #
265
+ #print(f"pixtral_processor: {type(pixtral_processor)}") #
266
 
267
  @spaces.GPU()
268
  @torch.inference_mode()
269
  def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
270
  max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
271
+ global tokenizer, text_model, image_adapter, pixtral_model, pixtral_processor, text_model_client, use_inference_client
272
  torch.cuda.empty_cache()
273
  gc.collect()
274
 
 
311
  print(f"pixtral_model: {type(pixtral_model)}") #
312
  print(f"pixtral_processor: {type(pixtral_processor)}") #
313
  input_images = [input_image.convert("RGB")]
 
314
  input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
315
  inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
316
  generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
 
381
  attention_mask = torch.ones_like(input_ids)
382
 
383
  # Debugging
384
+ #print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
385
 
386
  text_model.to(device)
387
  generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
 
411
 
412
 
413
  def is_repo_exists(repo_id):
 
414
  try:
415
  api = HfApi(token=HF_TOKEN)
416
  if api.repo_exists(repo_id=repo_id): return True
417
  else: return False
418
  except Exception as e:
419
+ print(f"Error: Failed to connect {repo_id}. {e}")
 
420
  return True # for safe
421
 
422
 
423
  def is_valid_repo(repo_id):
 
424
  import re
425
  try:
426
  if not re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', repo_id): return False
 
437
 
438
 
439
  def is_gguf_repo(repo_id: str):
 
440
  try:
441
  api = HfApi(token=HF_TOKEN)
442
  if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return False
443
  files = api.list_repo_files(repo_id=repo_id)
444
  except Exception as e:
445
+ print(f"Error: Failed to get {repo_id}'s info. {e}")
446
+ gr.Warning(f"Error: Failed to get {repo_id}'s info. {e}")
 
447
  return False
448
  files = [f for f in files if f.endswith(".gguf")]
449
  if len(files) == 0: return False
 
451
 
452
 
453
  def get_repo_gguf(repo_id: str):
 
454
  try:
455
  api = HfApi(token=HF_TOKEN)
456
  if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return gr.update(value="", choices=[])
457
  files = api.list_repo_files(repo_id=repo_id)
458
  except Exception as e:
459
+ print(f"Error: Failed to get {repo_id}'s info. {e}")
460
+ gr.Warning(f"Error: Failed to get {repo_id}'s info. {e}")
 
461
  return gr.update(value="", choices=[])
462
  files = [f for f in files if f.endswith(".gguf")]
463
  if len(files) == 0: return gr.update(value="", choices=[])
 
466
 
467
  @spaces.GPU()
468
  def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_file: Union[str, None]=None,
469
+ is_nf4: bool=True, is_lora: bool=True, progress=gr.Progress(track_tqdm=True)):
470
  global use_inference_client, llm_models
471
  use_inference_client = use_client
472
  try:
 
478
  if use_inference_client:
479
  pass #
480
  else:
481
+ load_text_model(model_name, gguf_file, is_nf4, is_lora)
482
  if model_name not in llm_models: llm_models[model_name] = gguf_file if gguf_file else None
483
  return gr.update(choices=get_text_model())
484
  except Exception as e: