Spaces:

rashmi
/

h2oai-predict-llm

Sleeping

App Files Files Community

rashmi commited on Nov 9, 2023

Commit

c1671d1

•

1 Parent(s): 4b09f99

update

Browse files

Files changed (1) hide show

app.py +97 -97

app.py CHANGED Viewed

@@ -42,118 +42,118 @@ theme = gr.themes.Monochrome(
 )
 ### Load the model
-    class CFG:
-        num_workers = os.cpu_count()
-        llm_backbone = "HuggingFaceH4/zephyr-7b-beta"
-        tokenizer_path = "HuggingFaceH4/zephyr-7b-beta"
-        tokenizer = AutoTokenizer.from_pretrained(
-            tokenizer_path, add_prefix_space=False, use_fast=True, trust_remote_code=True, add_eos_token=True
-        )
-        batch_size = 1
-        max_len = 650
-        seed = 42
-        num_labels = 7
-        lora = True
-        lora_r = 4
-        lora_alpha = 16
-        lora_dropout = 0.05
-        lora_target_modules = ""
-        gradient_checkpointing = True
-    class CustomModel(nn.Module):
-        """
-        Model for causal language modeling problem type.
-        """
-        def __init__(self):
-            super().__init__()
-            self.backbone_config = AutoConfig.from_pretrained(
-                CFG.llm_backbone, trust_remote_code=True
-            )
-            quantization_config = BitsAndBytesConfig(
-                load_in_4bit=True,
-                bnb_4bit_compute_dtype=torch.float16,
-                bnb_4bit_quant_type="nf4",
-            )
-            self.model = AutoModelForCausalLM.from_pretrained(
-                CFG.llm_backbone,
-                config=self.backbone_config,
-                quantization_config=quantization_config,
-            )
-            if CFG.lora:
-                target_modules = []
-                for name, module in self.model.named_modules():
-                    if (
-                        isinstance(module, (torch.nn.Linear, torch.nn.Conv1d))
-                        and "head" not in name
-                    ):
-                        name = name.split(".")[-1]
-                        if name not in target_modules:
-                            target_modules.append(name)
-                lora_config = LoraConfig(
-                    r=CFG.lora_r,
-                    lora_alpha=CFG.lora_alpha,
-                    target_modules=target_modules,
-                    lora_dropout=CFG.lora_dropout,
-                    bias="none",
-                    task_type="CAUSAL_LM",
-                )
-                if CFG.gradient_checkpointing:
-                    self.model.enable_input_require_grads()
-                self.model = get_peft_model(self.model, lora_config)
-                self.model.print_trainable_parameters()
-            self.classification_head = nn.Linear(
-                self.backbone_config.vocab_size, CFG.num_labels, bias=False
             )
-            self._init_weights(self.classification_head)
-        def _init_weights(self, module):
-            if isinstance(module, nn.Linear):
-                module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
-                if module.bias is not None:
-                    module.bias.data.zero_()
-            elif isinstance(module, nn.Embedding):
-                module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
-                if module.padding_idx is not None:
-                    module.weight.data[module.padding_idx].zero_()
-            elif isinstance(module, nn.LayerNorm):
-                module.bias.data.zero_()
-                module.weight.data.fill_(1.0)
-        def forward(
-            self,
-            batch
-        ):
-            # disable cache if gradient checkpointing is enabled
             if CFG.gradient_checkpointing:
-                self.model.config.use_cache = False
-            self.model.config.pretraining_tp = 1
-            output = self.model(
-                input_ids=batch["input_ids"],
-                attention_mask=batch["attention_mask"],
-            )
-            output.logits = self.classification_head(output[0][:, -1].float())
-            # enable cache again if gradient checkpointing is enabled
-            if CFG.gradient_checkpointing:
-                self.model.config.use_cache = True
-            return output.logits
-    model = CustomModel()
-    ### End Load the model
 def do_inference(full_text):

 )
 ### Load the model
+class CFG:
+    num_workers = os.cpu_count()
+    llm_backbone = "HuggingFaceH4/zephyr-7b-beta"
+    tokenizer_path = "HuggingFaceH4/zephyr-7b-beta"
+    tokenizer = AutoTokenizer.from_pretrained(
+        tokenizer_path, add_prefix_space=False, use_fast=True, trust_remote_code=True, add_eos_token=True
+    )
+    batch_size = 1
+    max_len = 650
+    seed = 42
+    num_labels = 7
+    lora = True
+    lora_r = 4
+    lora_alpha = 16
+    lora_dropout = 0.05
+    lora_target_modules = ""
+    gradient_checkpointing = True
+class CustomModel(nn.Module):
+    """
+    Model for causal language modeling problem type.
+    """
+    def __init__(self):
+        super().__init__()
+        self.backbone_config = AutoConfig.from_pretrained(
+            CFG.llm_backbone, trust_remote_code=True
+        )
+        quantization_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_compute_dtype=torch.float16,
+            bnb_4bit_quant_type="nf4",
+        )
+        self.model = AutoModelForCausalLM.from_pretrained(
+            CFG.llm_backbone,
+            config=self.backbone_config,
+            quantization_config=quantization_config,
+        )
+        if CFG.lora:
+            target_modules = []
+            for name, module in self.model.named_modules():
+                if (
+                    isinstance(module, (torch.nn.Linear, torch.nn.Conv1d))
+                    and "head" not in name
+                ):
+                    name = name.split(".")[-1]
+                    if name not in target_modules:
+                        target_modules.append(name)
+            lora_config = LoraConfig(
+                r=CFG.lora_r,
+                lora_alpha=CFG.lora_alpha,
+                target_modules=target_modules,
+                lora_dropout=CFG.lora_dropout,
+                bias="none",
+                task_type="CAUSAL_LM",
             )
             if CFG.gradient_checkpointing:
+                self.model.enable_input_require_grads()
+            self.model = get_peft_model(self.model, lora_config)
+            self.model.print_trainable_parameters()
+        self.classification_head = nn.Linear(
+            self.backbone_config.vocab_size, CFG.num_labels, bias=False
+        )
+        self._init_weights(self.classification_head)
+    def _init_weights(self, module):
+        if isinstance(module, nn.Linear):
+            module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
+            if module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.Embedding):
+            module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
+            if module.padding_idx is not None:
+                module.weight.data[module.padding_idx].zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(
+        self,
+        batch
+    ):
+        # disable cache if gradient checkpointing is enabled
+        if CFG.gradient_checkpointing:
+            self.model.config.use_cache = False
+        self.model.config.pretraining_tp = 1
+        output = self.model(
+            input_ids=batch["input_ids"],
+            attention_mask=batch["attention_mask"],
+        )
+        output.logits = self.classification_head(output[0][:, -1].float())
+        # enable cache again if gradient checkpointing is enabled
+        if CFG.gradient_checkpointing:
+            self.model.config.use_cache = True
+        return output.logits
+model = CustomModel()
+### End Load the model
 def do_inference(full_text):