rashmi commited on
Commit
294768e
1 Parent(s): 1924eb4
Files changed (1) hide show
  1. app.py +101 -97
app.py CHANGED
@@ -41,118 +41,123 @@ theme = gr.themes.Monochrome(
41
  font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
42
  )
43
 
44
- ### Load the model
45
- class CFG:
46
- num_workers = os.cpu_count()
47
- llm_backbone = "HuggingFaceH4/zephyr-7b-beta"
48
- tokenizer_path = "HuggingFaceH4/zephyr-7b-beta"
49
- tokenizer = AutoTokenizer.from_pretrained(
50
- tokenizer_path, add_prefix_space=False, use_fast=True, trust_remote_code=True, add_eos_token=True
51
- )
52
- batch_size = 1
53
- max_len = 650
54
- seed = 42
 
55
 
56
- num_labels = 7
57
 
58
- lora = True
59
- lora_r = 4
60
- lora_alpha = 16
61
- lora_dropout = 0.05
62
- lora_target_modules = ""
63
- gradient_checkpointing = True
64
 
65
 
66
- class CustomModel(nn.Module):
67
- """
68
- Model for causal language modeling problem type.
69
- """
70
 
71
- def __init__(self):
72
- super().__init__()
73
 
74
- self.backbone_config = AutoConfig.from_pretrained(
75
- CFG.llm_backbone, trust_remote_code=True
76
- )
77
 
78
- quantization_config = BitsAndBytesConfig(
79
- load_in_4bit=True,
80
- bnb_4bit_compute_dtype=torch.float16,
81
- bnb_4bit_quant_type="nf4",
82
- )
83
 
84
- self.model = AutoModelForCausalLM.from_pretrained(
85
- CFG.llm_backbone,
86
- config=self.backbone_config,
87
- quantization_config=quantization_config,
88
- )
89
 
90
- if CFG.lora:
91
- target_modules = []
92
- for name, module in self.model.named_modules():
93
- if (
94
- isinstance(module, (torch.nn.Linear, torch.nn.Conv1d))
95
- and "head" not in name
96
- ):
97
- name = name.split(".")[-1]
98
- if name not in target_modules:
99
- target_modules.append(name)
100
-
101
- lora_config = LoraConfig(
102
- r=CFG.lora_r,
103
- lora_alpha=CFG.lora_alpha,
104
- target_modules=target_modules,
105
- lora_dropout=CFG.lora_dropout,
106
- bias="none",
107
- task_type="CAUSAL_LM",
 
 
 
 
 
 
 
 
108
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  if CFG.gradient_checkpointing:
110
- self.model.enable_input_require_grads()
111
- self.model = get_peft_model(self.model, lora_config)
112
- self.model.print_trainable_parameters()
113
 
114
- self.classification_head = nn.Linear(
115
- self.backbone_config.vocab_size, CFG.num_labels, bias=False
116
- )
117
- self._init_weights(self.classification_head)
118
-
119
- def _init_weights(self, module):
120
- if isinstance(module, nn.Linear):
121
- module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
122
- if module.bias is not None:
123
- module.bias.data.zero_()
124
- elif isinstance(module, nn.Embedding):
125
- module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
126
- if module.padding_idx is not None:
127
- module.weight.data[module.padding_idx].zero_()
128
- elif isinstance(module, nn.LayerNorm):
129
- module.bias.data.zero_()
130
- module.weight.data.fill_(1.0)
131
-
132
- def forward(
133
- self,
134
- batch
135
- ):
136
- # disable cache if gradient checkpointing is enabled
137
- if CFG.gradient_checkpointing:
138
- self.model.config.use_cache = False
139
-
140
- self.model.config.pretraining_tp = 1
141
-
142
- output = self.model(
143
- input_ids=batch["input_ids"],
144
- attention_mask=batch["attention_mask"],
145
- )
146
 
147
- output.logits = self.classification_head(output[0][:, -1].float())
148
 
149
- # enable cache again if gradient checkpointing is enabled
150
- if CFG.gradient_checkpointing:
151
- self.model.config.use_cache = True
152
 
153
- return output.logits
 
 
154
 
155
 
 
 
156
  ### End Load the model
157
 
158
 
@@ -160,7 +165,7 @@ class CustomModel(nn.Module):
160
 
161
  def do_submit(question, response):
162
  full_text = question + " " + response
163
- # result = do_inference(full_text)
164
  return "result"
165
 
166
  @spaces.GPU
@@ -168,7 +173,6 @@ def greet():
168
  pass
169
 
170
  with gr.Blocks(title=title) as demo: # theme=theme
171
- model = CustomModel()
172
  sample_examples = pd.read_csv('sample_examples.csv')
173
  example_list = sample_examples[['Question','Response','target']].sample(2).values.tolist()
174
  gr.Markdown(f"## {title}")
 
41
  font=[gr.themes.GoogleFont("Open Sans"), "ui-sans-serif", "system-ui", "sans-serif"],
42
  )
43
 
44
+ def do_inference(full_text):
45
+ ### Load the model
46
+ class CFG:
47
+ num_workers = os.cpu_count()
48
+ llm_backbone = "HuggingFaceH4/zephyr-7b-beta"
49
+ tokenizer_path = "HuggingFaceH4/zephyr-7b-beta"
50
+ tokenizer = AutoTokenizer.from_pretrained(
51
+ tokenizer_path, add_prefix_space=False, use_fast=True, trust_remote_code=True, add_eos_token=True
52
+ )
53
+ batch_size = 1
54
+ max_len = 650
55
+ seed = 42
56
 
57
+ num_labels = 7
58
 
59
+ lora = True
60
+ lora_r = 4
61
+ lora_alpha = 16
62
+ lora_dropout = 0.05
63
+ lora_target_modules = ""
64
+ gradient_checkpointing = True
65
 
66
 
67
+ class CustomModel(nn.Module):
68
+ """
69
+ Model for causal language modeling problem type.
70
+ """
71
 
72
+ def __init__(self):
73
+ super().__init__()
74
 
75
+ self.backbone_config = AutoConfig.from_pretrained(
76
+ CFG.llm_backbone, trust_remote_code=True
77
+ )
78
 
79
+ quantization_config = BitsAndBytesConfig(
80
+ load_in_4bit=True,
81
+ bnb_4bit_compute_dtype=torch.float16,
82
+ bnb_4bit_quant_type="nf4",
83
+ )
84
 
85
+ self.model = AutoModelForCausalLM.from_pretrained(
86
+ CFG.llm_backbone,
87
+ config=self.backbone_config,
88
+ quantization_config=quantization_config,
89
+ )
90
 
91
+ if CFG.lora:
92
+ target_modules = []
93
+ for name, module in self.model.named_modules():
94
+ if (
95
+ isinstance(module, (torch.nn.Linear, torch.nn.Conv1d))
96
+ and "head" not in name
97
+ ):
98
+ name = name.split(".")[-1]
99
+ if name not in target_modules:
100
+ target_modules.append(name)
101
+
102
+ lora_config = LoraConfig(
103
+ r=CFG.lora_r,
104
+ lora_alpha=CFG.lora_alpha,
105
+ target_modules=target_modules,
106
+ lora_dropout=CFG.lora_dropout,
107
+ bias="none",
108
+ task_type="CAUSAL_LM",
109
+ )
110
+ if CFG.gradient_checkpointing:
111
+ self.model.enable_input_require_grads()
112
+ self.model = get_peft_model(self.model, lora_config)
113
+ self.model.print_trainable_parameters()
114
+
115
+ self.classification_head = nn.Linear(
116
+ self.backbone_config.vocab_size, CFG.num_labels, bias=False
117
  )
118
+ self._init_weights(self.classification_head)
119
+
120
+ def _init_weights(self, module):
121
+ if isinstance(module, nn.Linear):
122
+ module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
123
+ if module.bias is not None:
124
+ module.bias.data.zero_()
125
+ elif isinstance(module, nn.Embedding):
126
+ module.weight.data.normal_(mean=0.0, std=self.backbone_config.initializer_range)
127
+ if module.padding_idx is not None:
128
+ module.weight.data[module.padding_idx].zero_()
129
+ elif isinstance(module, nn.LayerNorm):
130
+ module.bias.data.zero_()
131
+ module.weight.data.fill_(1.0)
132
+
133
+ def forward(
134
+ self,
135
+ batch
136
+ ):
137
+ # disable cache if gradient checkpointing is enabled
138
  if CFG.gradient_checkpointing:
139
+ self.model.config.use_cache = False
 
 
140
 
141
+ self.model.config.pretraining_tp = 1
142
+
143
+ output = self.model(
144
+ input_ids=batch["input_ids"],
145
+ attention_mask=batch["attention_mask"],
146
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
 
148
+ output.logits = self.classification_head(output[0][:, -1].float())
149
 
150
+ # enable cache again if gradient checkpointing is enabled
151
+ if CFG.gradient_checkpointing:
152
+ self.model.config.use_cache = True
153
 
154
+ return output.logits
155
+
156
+ model = CustomModel()
157
 
158
 
159
+ return "result"
160
+
161
  ### End Load the model
162
 
163
 
 
165
 
166
  def do_submit(question, response):
167
  full_text = question + " " + response
168
+ result = do_inference(full_text)
169
  return "result"
170
 
171
  @spaces.GPU
 
173
  pass
174
 
175
  with gr.Blocks(title=title) as demo: # theme=theme
 
176
  sample_examples = pd.read_csv('sample_examples.csv')
177
  example_list = sample_examples[['Question','Response','target']].sample(2).values.tolist()
178
  gr.Markdown(f"## {title}")