wangjin2000 commited on
Commit
ed6a94b
·
verified ·
1 Parent(s): c57e387

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -131,14 +131,14 @@ def train_function_no_sweeps(base_model_path): #, train_dataset, test_dataset)
131
  # Add other hyperparameters as needed
132
  }
133
  # The base model you will train a LoRA on top of
134
- #base_model_path = "facebook/esm2_t12_35M_UR50D"
135
 
136
  # Define labels and model
137
  #id2label = {0: "No binding site", 1: "Binding site"}
138
  #label2id = {v: k for k, v in id2label.items()}
139
 
140
 
141
- base_model = AutoModelForTokenClassification.from_pretrained(base_model_path, num_labels=len(id2label), id2label=id2label, label2id=label2id, token=HF_TOKEN)
142
 
143
  '''
144
  # Load the data from pickle files (replace with your local paths)
@@ -156,7 +156,7 @@ def train_function_no_sweeps(base_model_path): #, train_dataset, test_dataset)
156
  '''
157
 
158
  # Tokenization
159
- tokenizer = AutoTokenizer.from_pretrained(base_model_path, token=HF_TOKEN) #("facebook/esm2_t12_35M_UR50D")
160
  #max_sequence_length = 1000
161
 
162
  train_tokenized = tokenizer(train_sequences, padding=True, truncation=True, max_length=max_sequence_length, return_tensors="pt", is_split_into_words=False)
 
131
  # Add other hyperparameters as needed
132
  }
133
  # The base model you will train a LoRA on top of
134
+ base_model_path = "facebook/esm2_t12_35M_UR50D"
135
 
136
  # Define labels and model
137
  #id2label = {0: "No binding site", 1: "Binding site"}
138
  #label2id = {v: k for k, v in id2label.items()}
139
 
140
 
141
+ base_model = AutoModelForTokenClassification.from_pretrained(base_model_path, num_labels=len(id2label), id2label=id2label, label2id=label2id)
142
 
143
  '''
144
  # Load the data from pickle files (replace with your local paths)
 
156
  '''
157
 
158
  # Tokenization
159
+ tokenizer = AutoTokenizer.from_pretrained(base_model_path) #("facebook/esm2_t12_35M_UR50D")
160
  #max_sequence_length = 1000
161
 
162
  train_tokenized = tokenizer(train_sequences, padding=True, truncation=True, max_length=max_sequence_length, return_tensors="pt", is_split_into_words=False)