Spaces:
Paused
Paused
File size: 2,924 Bytes
ad6330a ef08154 ad6330a 8bf0b67 ad6330a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
import subprocess
import os, requests
import torch, torchvision
import spaces
from huggingface_hub import login
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, LlavaNextForConditionalGeneration, LlavaForConditionalGeneration, PaliGemmaForConditionalGeneration, Idefics2ForConditionalGeneration
# Install required package
def install_flash_attn():
subprocess.run(
"pip install flash-attn --no-build-isolation",
env={"FLASH_ATTENTION_SKIP_CUDA_BUILD": "TRUE"},
shell=True,
)
# Authenticate with Hugging Face
def authenticate_hf(token):
login(token=token, add_to_git_credential=True)
# Function to get the model summary
model_cache = {}
@spaces.GPU
def get_model_summary(model_name):
if model_name in model_cache:
return model_cache[model_name], ""
try:
# Fetch the config.json file
config_url = f"https://huggingface.co/{model_name}/raw/main/config.json"
headers = {"Authorization": f"Bearer {os.getenv('HF_TOKEN')}"}
response = requests.get(config_url, headers=headers)
response.raise_for_status()
config = response.json()
architecture = config["architectures"][0]
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
# Check if the model is quantized
is_quantized = "quantized" in model_name.lower()
# Set up BitsAndBytesConfig if the model is quantized
bnb_config = BitsAndBytesConfig(load_in_4bit=True) if is_quantized else None
# Load the model based on its architecture and quantization status
if architecture == "LlavaNextForConditionalGeneration":
model = LlavaNextForConditionalGeneration.from_pretrained(
model_name, config=bnb_config, trust_remote_code=True
)
elif architecture == "LlavaForConditionalGeneration":
model = LlavaForConditionalGeneration.from_pretrained(
model_name, config=bnb_config, trust_remote_code=True
)
elif architecture == "PaliGemmaForConditionalGeneration":
model = PaliGemmaForConditionalGeneration.from_pretrained(
model_name, config=bnb_config, trust_remote_code=True
)
elif architecture == "Idefics2ForConditionalGeneration":
model = Idefics2ForConditionalGeneration.from_pretrained(
model_name, config=bnb_config, trust_remote_code=True
)
else:
model = AutoModelForCausalLM.from_pretrained(
model_name, config=bnb_config, trust_remote_code=True
)
# Move to device only if the model is not quantized
if not is_quantized:
model = model.to(device)
model_summary = str(model)
model_cache[model_name] = model_summary
return model_summary, ""
except Exception as e:
return "", str(e)
|