Spaces:

rashmi
/

h2oai-predict-llm

Sleeping

App Files Files Community

rashmi commited on Nov 9, 2023

Commit

e28c898

•

1 Parent(s): 7ebfc36

update

Browse files

Files changed (1) hide show

app.py +121 -1

app.py CHANGED Viewed

@@ -41,12 +41,132 @@ theme = gr.themes.Monochrome(
     font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
 )
 def do_submit(question, response):
     full_text = question + " " + response
     # result = do_inference(full_text)
     return "result"
 @spaces.GPU
 def greet():
     pass

     font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
 )
+### Load the model
+class CFG:
+    num_workers = os.cpu_count()
+    llm_backbone = "HuggingFaceH4/zephyr-7b-beta"
+    # tokenizer_path = "HuggingFaceH4/zephyr-7b-beta"
+    tokenizer_path = "/home/rashmi/Documents/kaggle/h2oai_predict_llm/src/models_exp56/tokenizer"
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_path, add_prefix_space=False, use_fast=True, trust_remote_code=True, add_eos_token=True
+    )
+    batch_size = 1
+    max_len = 650
+    seed = 42
+    num_labels = 7
+    lora = True
+    lora_r = 4
+    lora_alpha = 16
+    lora_dropout = 0.05
+    lora_target_modules = ""
+    gradient_checkpointing = True
+class CustomModel(nn.Module):
+    """
+    Model for causal language modeling problem type.
+    """
+    def __init__(self):
+        super().__init__()
+        self.backbone_config = AutoConfig.from_pretrained(
+            CFG.llm_backbone, trust_remote_code=True
+        )
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            CFG.llm_backbone,
+            config=self.backbone_config,
+            quantization_config=quantization_config,
+        )
+        if CFG.lora:
+            target_modules = []
+            for name, module in self.model.named_modules():
+                if (
+                    isinstance(module, (torch.nn.Linear, torch.nn.Conv1d))
+                    and "head" not in name
+                ):
+                    name = name.split(".")[-1]
+                    if name not in target_modules:
+                        target_modules.append(name)
+            lora_config = LoraConfig(
+                r=CFG.lora_r,
+                lora_alpha=CFG.lora_alpha,
+                target_modules=target_modules,
+                lora_dropout=CFG.lora_dropout,
+                bias="none",
+                task_type="CAUSAL_LM",
+            )
+            if CFG.gradient_checkpointing:
+                self.model.enable_input_require_grads()
+            self.model = get_peft_model(self.model, lora_config)
+            self.model.print_trainable_parameters()
+        self.classification_head = nn.Linear(
+            self.backbone_config.vocab_size, CFG.num_labels, bias=False
+        )
+        self._init_weights(self.classification_head)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(
+        self,
+        batch
+    ):
+        # disable cache if gradient checkpointing is enabled
+        if CFG.gradient_checkpointing:
+            self.model.config.use_cache = False
+        self.model.config.pretraining_tp = 1
+        output = self.model(
+            input_ids=batch["input_ids"],
+            attention_mask=batch["attention_mask"],
+        )
+        output.logits = self.classification_head(output[0][:, -1].float())
+        # enable cache again if gradient checkpointing is enabled
+        if CFG.gradient_checkpointing:
+            self.model.config.use_cache = True
+        return output.logits
+model = CustomModel()
+### End Load the model
 def do_submit(question, response):
     full_text = question + " " + response
     # result = do_inference(full_text)
     return "result"
 @spaces.GPU
 def greet():
     pass