Spaces:
Sleeping
Sleeping
import gradio as gr | |
import spaces | |
import os | |
import gc | |
import random | |
import warnings | |
warnings.filterwarnings("ignore") | |
import numpy as np | |
import pandas as pd | |
pd.set_option("display.max_rows", 500) | |
pd.set_option("display.max_columns", 500) | |
pd.set_option("display.width", 1000) | |
from tqdm.auto import tqdm | |
import torch | |
import torch.nn as nn | |
import tokenizers | |
import transformers | |
print(f"tokenizers.__version__: {tokenizers.__version__}") | |
print(f"transformers.__version__: {transformers.__version__}") | |
print(f"torch.__version__: {torch.__version__}") | |
print(f"torch cuda version: {torch.version.cuda}") | |
from transformers import AutoTokenizer, AutoConfig | |
from transformers import BitsAndBytesConfig, AutoModelForCausalLM, MistralForCausalLM | |
from peft import LoraConfig, get_peft_model | |
title = "H2O AI Predict the LLM" | |
#Theme from - https://huggingface.co/spaces/trl-lib/stack-llama/blob/main/app.py | |
theme = gr.themes.Monochrome( | |
primary_hue="indigo", | |
secondary_hue="blue", | |
neutral_hue="slate", | |
radius_size=gr.themes.sizes.radius_sm, | |
font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"], | |
) | |
### Load the model | |
class CFG: | |
num_workers = os.cpu_count() | |
llm_backbone = "HuggingFaceH4/zephyr-7b-beta" | |
tokenizer_path = "HuggingFaceH4/zephyr-7b-beta" | |
tokenizer = AutoTokenizer.from_pretrained( | |
tokenizer_path, add_prefix_space=False, use_fast=True, trust_remote_code=True, add_eos_token=True | |
) | |
batch_size = 1 | |
max_len = 650 | |
seed = 42 | |
num_labels = 7 | |
lora = True | |
lora_r = 4 | |
lora_alpha = 16 | |
lora_dropout = 0.05 | |
lora_target_modules = "" | |
gradient_checkpointing = True | |
class CustomModel(nn.Module): | |
""" | |
Model for causal language modeling problem type. | |
""" | |
def __init__(self): | |
super().__init__() | |
self.backbone_config = AutoConfig.from_pretrained( | |
CFG.llm_backbone, trust_remote_code=True | |
) | |
quantization_config = BitsAndBytesConfig( | |
load_in_4bit=True, | |
bnb_4bit_compute_dtype=torch.float16, | |
bnb_4bit_quant_type="nf4", | |
) | |
self.model = AutoModelForCausalLM.from_pretrained( | |
CFG.llm_backbone, | |
config=self.backbone_config, | |
quantization_config=quantization_config, | |
) | |
if CFG.lora: | |
target_modules = [] | |
for name, module in self.model.named_modules(): | |
if ( | |
isinstance(module, (torch.nn.Linear, torch.nn.Conv1d)) | |
and "head" not in name | |
): | |
name = name.split(".")[-1] | |
if name not in target_modules: | |
target_modules.append(name) | |
lora_config = LoraConfig( | |
r=CFG.lora_r, | |
lora_alpha=CFG.lora_alpha, | |
target_modules=target_modules, | |
lora_dropout=CFG.lora_dropout, | |
bias="none", | |
task_type="CAUSAL_LM", | |
) | |
if CFG.gradient_checkpointing: | |
self.model.enable_input_require_grads() | |
self.model = get_peft_model(self.model, lora_config) | |
self.model.print_trainable_parameters() | |
self.classification_head = nn.Linear( | |
self.backbone_config.vocab_size, CFG.num_labels, bias=False | |
) | |
self._init_weights(self.classification_head) | |
def _init_weights(self, module): | |
if isinstance(module, nn.Linear): | |
module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range) | |
if module.bias is not None: | |
module.bias.data.zero_() | |
elif isinstance(module, nn.Embedding): | |
module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range) | |
if module.padding_idx is not None: | |
module.weight.data[module.padding_idx].zero_() | |
elif isinstance(module, nn.LayerNorm): | |
module.bias.data.zero_() | |
module.weight.data.fill_(1.0) | |
def forward( | |
self, | |
batch | |
): | |
# disable cache if gradient checkpointing is enabled | |
if CFG.gradient_checkpointing: | |
self.model.config.use_cache = False | |
self.model.config.pretraining_tp = 1 | |
output = self.model( | |
input_ids=batch["input_ids"], | |
attention_mask=batch["attention_mask"], | |
) | |
output.logits = self.classification_head(output[0][:, -1].float()) | |
# enable cache again if gradient checkpointing is enabled | |
if CFG.gradient_checkpointing: | |
self.model.config.use_cache = True | |
return output.logits | |
model = CustomModel() | |
### End Load the model | |
def do_submit(question, response): | |
full_text = question + " " + response | |
# result = do_inference(full_text) | |
return "result" | |
def greet(): | |
pass | |
with gr.Blocks(title=title) as demo: # theme=theme | |
sample_examples = pd.read_csv('sample_examples.csv') | |
example_list = sample_examples[['Question','Response','target']].sample(2).values.tolist() | |
gr.Markdown(f"## {title}") | |
with gr.Row(): | |
# with gr.Column(scale=1): | |
# gr.Markdown("### Question and LLM Response") | |
question_text = gr.Textbox(lines=2, placeholder="Question:", label="") | |
response_text = gr.Textbox(lines=2, placeholder="Response:", label="") | |
target_text = gr.Textbox(lines=1, placeholder="Target:", label="", interactive=False , visible=False) | |
llm_num = gr.Textbox(value="", label="LLM #") | |
with gr.Row(): | |
sub_btn = gr.Button("Submit") | |
sub_btn.click(fn=do_submit, inputs=[question_text, response_text], outputs=[llm_num]) | |
gr.Markdown("## Sample Inputs:") | |
gr.Examples( | |
example_list, | |
[question_text,response_text,target_text], | |
# cache_examples=True, | |
) | |
demo.launch(greet) |