Spaces:
Running
on
Zero
Running
on
Zero
Upload 4 files
Browse files- app.py +4 -2
- joycaption.py +38 -37
app.py
CHANGED
@@ -4,7 +4,8 @@ from joycaption import stream_chat_mod, get_text_model, change_text_model, get_r
|
|
4 |
|
5 |
JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
|
6 |
JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
|
7 |
-
[Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha).
|
|
|
8 |
|
9 |
css = """
|
10 |
.info {text-align:center; !important}
|
@@ -65,6 +66,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
|
|
65 |
jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
|
66 |
allow_custom_value=True, min_width=320, visible=False)
|
67 |
jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
|
|
|
68 |
jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
|
69 |
jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
|
70 |
with gr.Row():
|
@@ -81,7 +83,7 @@ with gr.Blocks(fill_width=True, css=css, delete_cache=(60, 3600)) as demo:
|
|
81 |
|
82 |
jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
|
83 |
jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
|
84 |
-
jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
|
85 |
#jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
|
86 |
#jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
|
87 |
#jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
|
|
|
4 |
|
5 |
JC_TITLE_MD = "<h1><center>JoyCaption Alpha Two Mod</center></h1>"
|
6 |
JC_DESC_MD = """This space is mod of [fancyfeast/joy-caption-alpha-two](https://huggingface.co/spaces/fancyfeast/joy-caption-alpha-two),
|
7 |
+
[Wi-zz/joy-caption-pre-alpha](https://huggingface.co/Wi-zz/joy-caption-pre-alpha).
|
8 |
+
Thanks to [dominic1021](https://huggingface.co/dominic1021), [IceHibiki](https://huggingface.co/IceHibiki)."""
|
9 |
|
10 |
css = """
|
11 |
.info {text-align:center; !important}
|
|
|
66 |
jc_gguf = gr.Dropdown(label=f"GGUF Filename", choices=[], value="",
|
67 |
allow_custom_value=True, min_width=320, visible=False)
|
68 |
jc_nf4 = gr.Checkbox(label="Use NF4 quantization", value=True)
|
69 |
+
jc_lora = gr.Checkbox(label="Use Custom VLM", info="Llama 3 BF16 only", value=True)
|
70 |
jc_text_model_button = gr.Button("Load Model", variant="secondary", visible=False)
|
71 |
jc_use_inference_client = gr.Checkbox(label="Use Inference Client", value=False, visible=False)
|
72 |
with gr.Row():
|
|
|
83 |
|
84 |
jc_run_button.click(fn=stream_chat_mod, inputs=[jc_input_image, jc_caption_type, jc_caption_length, jc_extra_options, jc_name_input, jc_custom_prompt,
|
85 |
jc_tokens, jc_topp, jc_temperature, jc_text_model], outputs=[jc_output_prompt, jc_output_caption])
|
86 |
+
jc_text_model.change(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4, jc_lora], [jc_text_model], show_api=False)
|
87 |
#jc_text_model_button.click(change_text_model, [jc_text_model, jc_use_inference_client, jc_gguf, jc_nf4], [jc_text_model], show_api=False)
|
88 |
#jc_text_model.change(get_repo_gguf, [jc_text_model], [jc_gguf], show_api=False)
|
89 |
#jc_use_inference_client.change(change_text_model, [jc_text_model, jc_use_inference_client], [jc_text_model], show_api=False)
|
joycaption.py
CHANGED
@@ -9,7 +9,7 @@ else:
|
|
9 |
return func(*args, **kwargs)
|
10 |
return wrapper
|
11 |
import gradio as gr
|
12 |
-
from huggingface_hub import InferenceClient
|
13 |
from torch import nn
|
14 |
from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
|
15 |
from pathlib import Path
|
@@ -18,11 +18,15 @@ import torch.amp.autocast_mode
|
|
18 |
from PIL import Image
|
19 |
import torchvision.transforms.functional as TVF
|
20 |
import gc
|
21 |
-
from peft import
|
22 |
from typing import Union
|
23 |
|
24 |
-
|
25 |
-
|
|
|
|
|
|
|
|
|
26 |
|
27 |
BASE_DIR = Path(__file__).resolve().parent # Define the base directory
|
28 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
@@ -38,7 +42,8 @@ llm_models = {
|
|
38 |
"unsloth/Meta-Llama-3.1-8B-bnb-4bit": None,
|
39 |
"DevQuasar/HermesNova-Llama-3.1-8B": None,
|
40 |
"mergekit-community/L3.1-Boshima-b-FIX": None,
|
41 |
-
"
|
|
|
42 |
}
|
43 |
|
44 |
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
@@ -158,25 +163,26 @@ class ImageAdapter(nn.Module):
|
|
158 |
# https://huggingface.co/docs/transformers/main/en/peft#enable-and-disable-adapters
|
159 |
# https://huggingface.co/docs/transformers/main/quantization/bitsandbytes?bnb=4-bit
|
160 |
# https://huggingface.co/lllyasviel/flux1-dev-bnb-nf4
|
|
|
|
|
161 |
tokenizer = None
|
162 |
text_model_client = None
|
163 |
text_model = None
|
164 |
image_adapter = None
|
165 |
-
peft_config = None
|
166 |
pixtral_model = None
|
167 |
pixtral_processor = None
|
168 |
-
def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True):
|
169 |
-
global tokenizer, text_model, image_adapter,
|
170 |
try:
|
171 |
tokenizer = None
|
172 |
text_model_client = None
|
173 |
text_model = None
|
174 |
image_adapter = None
|
175 |
-
peft_config = None
|
176 |
pixtral_model = None
|
177 |
pixtral_processor = None
|
178 |
torch.cuda.empty_cache()
|
179 |
gc.collect()
|
|
|
180 |
|
181 |
from transformers import BitsAndBytesConfig
|
182 |
nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
|
@@ -202,23 +208,25 @@ def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None
|
|
202 |
if device == "cpu":
|
203 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
204 |
elif is_nf4:
|
205 |
-
text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
206 |
else:
|
207 |
-
text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=
|
208 |
else:
|
209 |
if device == "cpu":
|
210 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
211 |
elif is_nf4:
|
212 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
213 |
else:
|
214 |
-
text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=
|
215 |
|
216 |
-
if LORA_PATH.exists():
|
217 |
print("Loading VLM's custom text model")
|
218 |
-
if is_nf4:
|
219 |
-
|
220 |
-
|
221 |
-
|
|
|
|
|
222 |
|
223 |
print("Loading image adapter")
|
224 |
image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
|
@@ -237,6 +245,7 @@ load_text_model.zerogpu = True
|
|
237 |
print("Loading CLIP")
|
238 |
clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
|
239 |
clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
|
|
|
240 |
if (CHECKPOINT_PATH / "clip_model.pt").exists():
|
241 |
print("Loading VLM's custom vision model")
|
242 |
checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
|
@@ -251,15 +260,15 @@ clip_model.eval().requires_grad_(False).to(device)
|
|
251 |
#load_text_model(PIXTRAL_PATHS[0])
|
252 |
#print(f"pixtral_model: {type(pixtral_model)}") #
|
253 |
#print(f"pixtral_processor: {type(pixtral_processor)}") #
|
254 |
-
load_text_model()
|
255 |
-
print(f"pixtral_model: {type(pixtral_model)}") #
|
256 |
-
print(f"pixtral_processor: {type(pixtral_processor)}") #
|
257 |
|
258 |
@spaces.GPU()
|
259 |
@torch.inference_mode()
|
260 |
def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
|
261 |
max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
|
262 |
-
global tokenizer, text_model, image_adapter,
|
263 |
torch.cuda.empty_cache()
|
264 |
gc.collect()
|
265 |
|
@@ -302,7 +311,6 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length:
|
|
302 |
print(f"pixtral_model: {type(pixtral_model)}") #
|
303 |
print(f"pixtral_processor: {type(pixtral_processor)}") #
|
304 |
input_images = [input_image.convert("RGB")]
|
305 |
-
#input_prompt = f"[INST]{prompt_str}\n[IMG][/INST]"
|
306 |
input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
|
307 |
inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
|
308 |
generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
|
@@ -373,7 +381,7 @@ def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length:
|
|
373 |
attention_mask = torch.ones_like(input_ids)
|
374 |
|
375 |
# Debugging
|
376 |
-
print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
|
377 |
|
378 |
text_model.to(device)
|
379 |
generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
|
@@ -403,19 +411,16 @@ def is_repo_name(s):
|
|
403 |
|
404 |
|
405 |
def is_repo_exists(repo_id):
|
406 |
-
from huggingface_hub import HfApi
|
407 |
try:
|
408 |
api = HfApi(token=HF_TOKEN)
|
409 |
if api.repo_exists(repo_id=repo_id): return True
|
410 |
else: return False
|
411 |
except Exception as e:
|
412 |
-
print(f"Error: Failed to connect {repo_id}.")
|
413 |
-
print(e)
|
414 |
return True # for safe
|
415 |
|
416 |
|
417 |
def is_valid_repo(repo_id):
|
418 |
-
from huggingface_hub import HfApi
|
419 |
import re
|
420 |
try:
|
421 |
if not re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', repo_id): return False
|
@@ -432,15 +437,13 @@ def get_text_model():
|
|
432 |
|
433 |
|
434 |
def is_gguf_repo(repo_id: str):
|
435 |
-
from huggingface_hub import HfApi
|
436 |
try:
|
437 |
api = HfApi(token=HF_TOKEN)
|
438 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return False
|
439 |
files = api.list_repo_files(repo_id=repo_id)
|
440 |
except Exception as e:
|
441 |
-
print(f"Error: Failed to get {repo_id}'s info.")
|
442 |
-
|
443 |
-
gr.Warning(f"Error: Failed to get {repo_id}'s info.")
|
444 |
return False
|
445 |
files = [f for f in files if f.endswith(".gguf")]
|
446 |
if len(files) == 0: return False
|
@@ -448,15 +451,13 @@ def is_gguf_repo(repo_id: str):
|
|
448 |
|
449 |
|
450 |
def get_repo_gguf(repo_id: str):
|
451 |
-
from huggingface_hub import HfApi
|
452 |
try:
|
453 |
api = HfApi(token=HF_TOKEN)
|
454 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return gr.update(value="", choices=[])
|
455 |
files = api.list_repo_files(repo_id=repo_id)
|
456 |
except Exception as e:
|
457 |
-
print(f"Error: Failed to get {repo_id}'s info.")
|
458 |
-
|
459 |
-
gr.Warning(f"Error: Failed to get {repo_id}'s info.")
|
460 |
return gr.update(value="", choices=[])
|
461 |
files = [f for f in files if f.endswith(".gguf")]
|
462 |
if len(files) == 0: return gr.update(value="", choices=[])
|
@@ -465,7 +466,7 @@ def get_repo_gguf(repo_id: str):
|
|
465 |
|
466 |
@spaces.GPU()
|
467 |
def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_file: Union[str, None]=None,
|
468 |
-
is_nf4: bool=True, progress=gr.Progress(track_tqdm=True)):
|
469 |
global use_inference_client, llm_models
|
470 |
use_inference_client = use_client
|
471 |
try:
|
@@ -477,7 +478,7 @@ def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_f
|
|
477 |
if use_inference_client:
|
478 |
pass #
|
479 |
else:
|
480 |
-
load_text_model(model_name, gguf_file, is_nf4)
|
481 |
if model_name not in llm_models: llm_models[model_name] = gguf_file if gguf_file else None
|
482 |
return gr.update(choices=get_text_model())
|
483 |
except Exception as e:
|
|
|
9 |
return func(*args, **kwargs)
|
10 |
return wrapper
|
11 |
import gradio as gr
|
12 |
+
from huggingface_hub import InferenceClient, HfApi
|
13 |
from torch import nn
|
14 |
from transformers import AutoModel, AutoProcessor, AutoTokenizer, PreTrainedTokenizer, PreTrainedTokenizerFast, AutoModelForCausalLM, LlavaForConditionalGeneration
|
15 |
from pathlib import Path
|
|
|
18 |
from PIL import Image
|
19 |
import torchvision.transforms.functional as TVF
|
20 |
import gc
|
21 |
+
from peft import PeftModel
|
22 |
from typing import Union
|
23 |
|
24 |
+
LOAD_IN_NF4 = True
|
25 |
+
|
26 |
+
if os.environ.get("SPACES_ZERO_GPU") is not None:
|
27 |
+
import subprocess
|
28 |
+
LOAD_IN_NF4 = False # If true, Custom VLM LoRA doesn't work initially. The rest are fine.
|
29 |
+
subprocess.run('pip install flash-attn --no-build-isolation', env={'FLASH_ATTENTION_SKIP_CUDA_BUILD': "TRUE"}, shell=True)
|
30 |
|
31 |
BASE_DIR = Path(__file__).resolve().parent # Define the base directory
|
32 |
device = "cuda" if torch.cuda.is_available() else "cpu"
|
|
|
42 |
"unsloth/Meta-Llama-3.1-8B-bnb-4bit": None,
|
43 |
"DevQuasar/HermesNova-Llama-3.1-8B": None,
|
44 |
"mergekit-community/L3.1-Boshima-b-FIX": None,
|
45 |
+
#"chuanli11/Llama-3.2-3B-Instruct-uncensored": None, # Error(s) in loading state_dict for ImageAdapter:\n\tsize mismatch for linear1.weight: copying a param with shape torch.Size([4096, 1152]) from checkpoint, the shape in current model is torch.Size([3072, 1152]).\n\tsize mismatch for linear1.bias: copying a param with shape torch.Size([4096]) from checkpoint,
|
46 |
+
"unsloth/Meta-Llama-3.1-8B-Instruct": None,
|
47 |
}
|
48 |
|
49 |
CLIP_PATH = "google/siglip-so400m-patch14-384"
|
|
|
163 |
# https://huggingface.co/docs/transformers/main/en/peft#enable-and-disable-adapters
|
164 |
# https://huggingface.co/docs/transformers/main/quantization/bitsandbytes?bnb=4-bit
|
165 |
# https://huggingface.co/lllyasviel/flux1-dev-bnb-nf4
|
166 |
+
# https://github.com/huggingface/transformers/issues/28515
|
167 |
+
# https://gist.github.com/ChrisHayduk/1a53463331f52dca205e55982baf9930
|
168 |
tokenizer = None
|
169 |
text_model_client = None
|
170 |
text_model = None
|
171 |
image_adapter = None
|
|
|
172 |
pixtral_model = None
|
173 |
pixtral_processor = None
|
174 |
+
def load_text_model(model_name: str=MODEL_PATH, gguf_file: Union[str, None]=None, is_nf4: bool=True, is_lora: bool=True):
|
175 |
+
global tokenizer, text_model, image_adapter, pixtral_model, pixtral_processor, text_model_client, use_inference_client
|
176 |
try:
|
177 |
tokenizer = None
|
178 |
text_model_client = None
|
179 |
text_model = None
|
180 |
image_adapter = None
|
|
|
181 |
pixtral_model = None
|
182 |
pixtral_processor = None
|
183 |
torch.cuda.empty_cache()
|
184 |
gc.collect()
|
185 |
+
lora_device = "auto"
|
186 |
|
187 |
from transformers import BitsAndBytesConfig
|
188 |
nf4_config = BitsAndBytesConfig(load_in_4bit=True, bnb_4bit_quant_type="nf4",
|
|
|
208 |
if device == "cpu":
|
209 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
210 |
elif is_nf4:
|
211 |
+
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
212 |
else:
|
213 |
+
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=lora_device, torch_dtype=torch.bfloat16).eval()
|
214 |
else:
|
215 |
if device == "cpu":
|
216 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, gguf_file=gguf_file, device_map=device, torch_dtype=torch.bfloat16).eval()
|
217 |
elif is_nf4:
|
218 |
text_model = AutoModelForCausalLM.from_pretrained(model_name, quantization_config=nf4_config, device_map=device, torch_dtype=torch.bfloat16).eval()
|
219 |
else:
|
220 |
+
text_model = AutoModelForCausalLM.from_pretrained(model_name, device_map=lora_device, torch_dtype=torch.bfloat16).eval()
|
221 |
|
222 |
+
if is_lora and LORA_PATH.exists() and not is_nf4:
|
223 |
print("Loading VLM's custom text model")
|
224 |
+
if is_nf4: # omitted
|
225 |
+
text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device, quantization_config=nf4_config)
|
226 |
+
else:
|
227 |
+
text_model = PeftModel.from_pretrained(model=text_model, model_id=LORA_PATH, device_map=device)
|
228 |
+
text_model = text_model.merge_and_unload(safe_merge=True) # to avoid PEFT bug https://github.com/huggingface/transformers/issues/28515
|
229 |
+
else: print("VLM's custom text model is not loaded")
|
230 |
|
231 |
print("Loading image adapter")
|
232 |
image_adapter = ImageAdapter(clip_model.config.hidden_size, text_model.config.hidden_size, False, False, 38, False).eval().to("cpu")
|
|
|
245 |
print("Loading CLIP")
|
246 |
clip_processor = AutoProcessor.from_pretrained(CLIP_PATH)
|
247 |
clip_model = AutoModel.from_pretrained(CLIP_PATH).vision_model
|
248 |
+
assert (CHECKPOINT_PATH / "clip_model.pt").exists()
|
249 |
if (CHECKPOINT_PATH / "clip_model.pt").exists():
|
250 |
print("Loading VLM's custom vision model")
|
251 |
checkpoint = torch.load(CHECKPOINT_PATH / "clip_model.pt", map_location='cpu', weights_only=False)
|
|
|
260 |
#load_text_model(PIXTRAL_PATHS[0])
|
261 |
#print(f"pixtral_model: {type(pixtral_model)}") #
|
262 |
#print(f"pixtral_processor: {type(pixtral_processor)}") #
|
263 |
+
load_text_model(MODEL_PATH, None, LOAD_IN_NF4, True)
|
264 |
+
#print(f"pixtral_model: {type(pixtral_model)}") #
|
265 |
+
#print(f"pixtral_processor: {type(pixtral_processor)}") #
|
266 |
|
267 |
@spaces.GPU()
|
268 |
@torch.inference_mode()
|
269 |
def stream_chat_mod(input_image: Image.Image, caption_type: str, caption_length: Union[str, int], extra_options: list[str], name_input: str, custom_prompt: str,
|
270 |
max_new_tokens: int=300, top_p: float=0.9, temperature: float=0.6, model_name: str=MODEL_PATH, progress=gr.Progress(track_tqdm=True)) -> tuple[str, str]:
|
271 |
+
global tokenizer, text_model, image_adapter, pixtral_model, pixtral_processor, text_model_client, use_inference_client
|
272 |
torch.cuda.empty_cache()
|
273 |
gc.collect()
|
274 |
|
|
|
311 |
print(f"pixtral_model: {type(pixtral_model)}") #
|
312 |
print(f"pixtral_processor: {type(pixtral_processor)}") #
|
313 |
input_images = [input_image.convert("RGB")]
|
|
|
314 |
input_prompt = "[INST]Caption this image:\n[IMG][/INST]"
|
315 |
inputs = pixtral_processor(images=input_images, text=input_prompt, return_tensors="pt").to(device)
|
316 |
generate_ids = pixtral_model.generate(**inputs, max_new_tokens=max_new_tokens)
|
|
|
381 |
attention_mask = torch.ones_like(input_ids)
|
382 |
|
383 |
# Debugging
|
384 |
+
#print(f"Input to model: {repr(tokenizer.decode(input_ids[0]))}")
|
385 |
|
386 |
text_model.to(device)
|
387 |
generate_ids = text_model.generate(input_ids, inputs_embeds=input_embeds, attention_mask=attention_mask, max_new_tokens=max_new_tokens,
|
|
|
411 |
|
412 |
|
413 |
def is_repo_exists(repo_id):
|
|
|
414 |
try:
|
415 |
api = HfApi(token=HF_TOKEN)
|
416 |
if api.repo_exists(repo_id=repo_id): return True
|
417 |
else: return False
|
418 |
except Exception as e:
|
419 |
+
print(f"Error: Failed to connect {repo_id}. {e}")
|
|
|
420 |
return True # for safe
|
421 |
|
422 |
|
423 |
def is_valid_repo(repo_id):
|
|
|
424 |
import re
|
425 |
try:
|
426 |
if not re.fullmatch(r'^[^/,\s\"\']+/[^/,\s\"\']+$', repo_id): return False
|
|
|
437 |
|
438 |
|
439 |
def is_gguf_repo(repo_id: str):
|
|
|
440 |
try:
|
441 |
api = HfApi(token=HF_TOKEN)
|
442 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return False
|
443 |
files = api.list_repo_files(repo_id=repo_id)
|
444 |
except Exception as e:
|
445 |
+
print(f"Error: Failed to get {repo_id}'s info. {e}")
|
446 |
+
gr.Warning(f"Error: Failed to get {repo_id}'s info. {e}")
|
|
|
447 |
return False
|
448 |
files = [f for f in files if f.endswith(".gguf")]
|
449 |
if len(files) == 0: return False
|
|
|
451 |
|
452 |
|
453 |
def get_repo_gguf(repo_id: str):
|
|
|
454 |
try:
|
455 |
api = HfApi(token=HF_TOKEN)
|
456 |
if not is_repo_name(repo_id) or not is_repo_exists(repo_id): return gr.update(value="", choices=[])
|
457 |
files = api.list_repo_files(repo_id=repo_id)
|
458 |
except Exception as e:
|
459 |
+
print(f"Error: Failed to get {repo_id}'s info. {e}")
|
460 |
+
gr.Warning(f"Error: Failed to get {repo_id}'s info. {e}")
|
|
|
461 |
return gr.update(value="", choices=[])
|
462 |
files = [f for f in files if f.endswith(".gguf")]
|
463 |
if len(files) == 0: return gr.update(value="", choices=[])
|
|
|
466 |
|
467 |
@spaces.GPU()
|
468 |
def change_text_model(model_name: str=MODEL_PATH, use_client: bool=False, gguf_file: Union[str, None]=None,
|
469 |
+
is_nf4: bool=True, is_lora: bool=True, progress=gr.Progress(track_tqdm=True)):
|
470 |
global use_inference_client, llm_models
|
471 |
use_inference_client = use_client
|
472 |
try:
|
|
|
478 |
if use_inference_client:
|
479 |
pass #
|
480 |
else:
|
481 |
+
load_text_model(model_name, gguf_file, is_nf4, is_lora)
|
482 |
if model_name not in llm_models: llm_models[model_name] = gguf_file if gguf_file else None
|
483 |
return gr.update(choices=get_text_model())
|
484 |
except Exception as e:
|