BrightBlueCheese commited on
Commit
840ecbd
1 Parent(s): b0faaa6
.ipynb_checkpoints/app-checkpoint.py CHANGED
@@ -33,14 +33,17 @@ sys.path.append( '../')
33
  import tokenizer_sl, datamodule_finetune_sl, model_finetune_sl, chemllama_mtr, utils_sl
34
  import auto_evaluator_sl
35
 
36
- from torch.utils.data import Dataset, DataLoader
37
- from transformers import DataCollatorWithPadding
38
-
39
  torch.manual_seed(1004)
40
  np.random.seed(1004)
41
 
42
- # smiles_str = st.text_area('Enter SMILE string')
43
- smiles_str = "CC02"
 
 
 
 
 
 
44
 
45
  ###
46
  # solute_or_solvent = 'solute'
@@ -81,23 +84,23 @@ dir_model_ft_to_save = f"{dir_main}/SolLlama-mtr"
81
  # name_model_ft = 'Solvent.pt'
82
  name_model_ft = f"{solute_or_solvent}.pt"
83
 
84
- # # Load dataset for finetune
85
- # batch_size_for_train = batch_size_pair[0]
86
- # batch_size_for_valid = batch_size_pair[1]
87
-
88
- # data_module = datamodule_finetune_sl.CustomFinetuneDataModule(
89
- # solute_or_solvent=solute_or_solvent,
90
- # tokenizer=tokenizer,
91
- # max_seq_length=max_length,
92
- # batch_size_train=batch_size_for_train,
93
- # batch_size_valid=batch_size_for_valid,
94
- # # num_device=int(config.NUM_DEVICE) * config.NUM_WORKERS_MULTIPLIER,
95
- # num_device=num_workers,
96
- # )
97
 
98
- # data_module.prepare_data(smiles_str=smiles_str)
99
- # data_module.setup()
100
- # steps_per_epoch = len(data_module.test_dataloader())
101
 
102
  # Load model and optimizer for finetune
103
  learning_rate = lr
@@ -108,8 +111,7 @@ model_mtr = chemllama_mtr.ChemLlama.load_from_checkpoint(dir_model_mtr)
108
 
109
  model_ft = model_finetune_sl.CustomFinetuneModel(
110
  model_mtr=model_mtr,
111
- # steps_per_epoch=steps_per_epoch,
112
- steps_per_epoch=1,
113
  warmup_epochs=1,
114
  max_epochs=epochs,
115
  learning_rate=learning_rate,
@@ -141,17 +143,7 @@ local_model_ft = utils_sl.load_model_ft_with(
141
  name_model_ft=name_model_ft
142
  )
143
 
144
- data_loader = DataLoader(
145
- dataset=datamodule_finetune_sl.CustomLlamaDatasetAbraham(
146
- smiles_str, tokenizer, max_seq_length,
147
- ),
148
- batch_size=1,
149
- num_workers=1,
150
- collate_fn=DataCollatorWithPadding(tokenizer),
151
- shuffle=False,
152
- )
153
-
154
- result = trainer.predict(local_model_ft, data_loader=data_loader)
155
  result_pred = list()
156
  result_label = list()
157
  for bat in range(len(result)):
 
33
  import tokenizer_sl, datamodule_finetune_sl, model_finetune_sl, chemllama_mtr, utils_sl
34
  import auto_evaluator_sl
35
 
 
 
 
36
  torch.manual_seed(1004)
37
  np.random.seed(1004)
38
 
39
+ smiles_str = st.text_area('Enter SMILE string')
40
+ file_path = './smiles_str.txt'
41
+
42
+ # Open the file in write mode ('w') and write the content
43
+ with open(file_path, 'w') as file:
44
+ file.write(smiles_str)
45
+
46
+ # smiles_str = "CC02"
47
 
48
  ###
49
  # solute_or_solvent = 'solute'
 
84
  # name_model_ft = 'Solvent.pt'
85
  name_model_ft = f"{solute_or_solvent}.pt"
86
 
87
+ # Load dataset for finetune
88
+ batch_size_for_train = batch_size_pair[0]
89
+ batch_size_for_valid = batch_size_pair[1]
90
+
91
+ data_module = datamodule_finetune_sl.CustomFinetuneDataModule(
92
+ solute_or_solvent=solute_or_solvent,
93
+ tokenizer=tokenizer,
94
+ max_seq_length=max_length,
95
+ batch_size_train=batch_size_for_train,
96
+ batch_size_valid=batch_size_for_valid,
97
+ # num_device=int(config.NUM_DEVICE) * config.NUM_WORKERS_MULTIPLIER,
98
+ num_device=num_workers,
99
+ )
100
 
101
+ data_module.prepare_data()
102
+ data_module.setup()
103
+ steps_per_epoch = len(data_module.test_dataloader())
104
 
105
  # Load model and optimizer for finetune
106
  learning_rate = lr
 
111
 
112
  model_ft = model_finetune_sl.CustomFinetuneModel(
113
  model_mtr=model_mtr,
114
+ steps_per_epoch=steps_per_epoch,
 
115
  warmup_epochs=1,
116
  max_epochs=epochs,
117
  learning_rate=learning_rate,
 
143
  name_model_ft=name_model_ft
144
  )
145
 
146
+ result = trainer.predict(local_model_ft, data_module)
 
 
 
 
 
 
 
 
 
 
147
  result_pred = list()
148
  result_label = list()
149
  for bat in range(len(result)):
.ipynb_checkpoints/datamodule_finetune_sl-checkpoint.py CHANGED
@@ -59,8 +59,10 @@ class CustomFinetuneDataModule(L.LightningDataModule):
59
  self.num_device = num_device
60
 
61
 
62
- def prepare_data(self, smiles_str:str):
63
  # self.list_df = load_abraham(self.solute_or_solvent)
 
 
64
  self.smiles_str = smiles_str
65
 
66
  def setup(self, stage=None):
 
59
  self.num_device = num_device
60
 
61
 
62
+ def prepare_data(self):
63
  # self.list_df = load_abraham(self.solute_or_solvent)
64
+ with open('./smiles_str.txt', 'r') as file:
65
+ smiles_str = file.readline()
66
  self.smiles_str = smiles_str
67
 
68
  def setup(self, stage=None):
app.py CHANGED
@@ -33,14 +33,17 @@ sys.path.append( '../')
33
  import tokenizer_sl, datamodule_finetune_sl, model_finetune_sl, chemllama_mtr, utils_sl
34
  import auto_evaluator_sl
35
 
36
- from torch.utils.data import Dataset, DataLoader
37
- from transformers import DataCollatorWithPadding
38
-
39
  torch.manual_seed(1004)
40
  np.random.seed(1004)
41
 
42
- # smiles_str = st.text_area('Enter SMILE string')
43
- smiles_str = "CC02"
 
 
 
 
 
 
44
 
45
  ###
46
  # solute_or_solvent = 'solute'
@@ -81,23 +84,23 @@ dir_model_ft_to_save = f"{dir_main}/SolLlama-mtr"
81
  # name_model_ft = 'Solvent.pt'
82
  name_model_ft = f"{solute_or_solvent}.pt"
83
 
84
- # # Load dataset for finetune
85
- # batch_size_for_train = batch_size_pair[0]
86
- # batch_size_for_valid = batch_size_pair[1]
87
-
88
- # data_module = datamodule_finetune_sl.CustomFinetuneDataModule(
89
- # solute_or_solvent=solute_or_solvent,
90
- # tokenizer=tokenizer,
91
- # max_seq_length=max_length,
92
- # batch_size_train=batch_size_for_train,
93
- # batch_size_valid=batch_size_for_valid,
94
- # # num_device=int(config.NUM_DEVICE) * config.NUM_WORKERS_MULTIPLIER,
95
- # num_device=num_workers,
96
- # )
97
 
98
- # data_module.prepare_data(smiles_str=smiles_str)
99
- # data_module.setup()
100
- # steps_per_epoch = len(data_module.test_dataloader())
101
 
102
  # Load model and optimizer for finetune
103
  learning_rate = lr
@@ -108,8 +111,7 @@ model_mtr = chemllama_mtr.ChemLlama.load_from_checkpoint(dir_model_mtr)
108
 
109
  model_ft = model_finetune_sl.CustomFinetuneModel(
110
  model_mtr=model_mtr,
111
- # steps_per_epoch=steps_per_epoch,
112
- steps_per_epoch=1,
113
  warmup_epochs=1,
114
  max_epochs=epochs,
115
  learning_rate=learning_rate,
@@ -141,17 +143,7 @@ local_model_ft = utils_sl.load_model_ft_with(
141
  name_model_ft=name_model_ft
142
  )
143
 
144
- data_loader = DataLoader(
145
- dataset=datamodule_finetune_sl.CustomLlamaDatasetAbraham(
146
- smiles_str, tokenizer, max_seq_length,
147
- ),
148
- batch_size=1,
149
- num_workers=1,
150
- collate_fn=DataCollatorWithPadding(tokenizer),
151
- shuffle=False,
152
- )
153
-
154
- result = trainer.predict(local_model_ft, data_loader=data_loader)
155
  result_pred = list()
156
  result_label = list()
157
  for bat in range(len(result)):
 
33
  import tokenizer_sl, datamodule_finetune_sl, model_finetune_sl, chemllama_mtr, utils_sl
34
  import auto_evaluator_sl
35
 
 
 
 
36
  torch.manual_seed(1004)
37
  np.random.seed(1004)
38
 
39
+ smiles_str = st.text_area('Enter SMILE string')
40
+ file_path = './smiles_str.txt'
41
+
42
+ # Open the file in write mode ('w') and write the content
43
+ with open(file_path, 'w') as file:
44
+ file.write(smiles_str)
45
+
46
+ # smiles_str = "CC02"
47
 
48
  ###
49
  # solute_or_solvent = 'solute'
 
84
  # name_model_ft = 'Solvent.pt'
85
  name_model_ft = f"{solute_or_solvent}.pt"
86
 
87
+ # Load dataset for finetune
88
+ batch_size_for_train = batch_size_pair[0]
89
+ batch_size_for_valid = batch_size_pair[1]
90
+
91
+ data_module = datamodule_finetune_sl.CustomFinetuneDataModule(
92
+ solute_or_solvent=solute_or_solvent,
93
+ tokenizer=tokenizer,
94
+ max_seq_length=max_length,
95
+ batch_size_train=batch_size_for_train,
96
+ batch_size_valid=batch_size_for_valid,
97
+ # num_device=int(config.NUM_DEVICE) * config.NUM_WORKERS_MULTIPLIER,
98
+ num_device=num_workers,
99
+ )
100
 
101
+ data_module.prepare_data()
102
+ data_module.setup()
103
+ steps_per_epoch = len(data_module.test_dataloader())
104
 
105
  # Load model and optimizer for finetune
106
  learning_rate = lr
 
111
 
112
  model_ft = model_finetune_sl.CustomFinetuneModel(
113
  model_mtr=model_mtr,
114
+ steps_per_epoch=steps_per_epoch,
 
115
  warmup_epochs=1,
116
  max_epochs=epochs,
117
  learning_rate=learning_rate,
 
143
  name_model_ft=name_model_ft
144
  )
145
 
146
+ result = trainer.predict(local_model_ft, data_module)
 
 
 
 
 
 
 
 
 
 
147
  result_pred = list()
148
  result_label = list()
149
  for bat in range(len(result)):
datamodule_finetune_sl.py CHANGED
@@ -59,8 +59,10 @@ class CustomFinetuneDataModule(L.LightningDataModule):
59
  self.num_device = num_device
60
 
61
 
62
- def prepare_data(self, smiles_str:str):
63
  # self.list_df = load_abraham(self.solute_or_solvent)
 
 
64
  self.smiles_str = smiles_str
65
 
66
  def setup(self, stage=None):
 
59
  self.num_device = num_device
60
 
61
 
62
+ def prepare_data(self):
63
  # self.list_df = load_abraham(self.solute_or_solvent)
64
+ with open('./smiles_str.txt', 'r') as file:
65
+ smiles_str = file.readline()
66
  self.smiles_str = smiles_str
67
 
68
  def setup(self, stage=None):
model_finetune_sl.py CHANGED
@@ -113,7 +113,7 @@ class CustomFinetuneModel(L.LightningModule):
113
 
114
  return loss, logits, labels
115
 
116
- def predict_step(self, batch, batch_idx, dataloader_idx=0):
117
  loss, logits, labels = self._common_step(batch=batch, batch_idx=batch_idx)
118
 
119
  return logits, labels
 
113
 
114
  return loss, logits, labels
115
 
116
+ def predict_step(self, batch, batch_idx):
117
  loss, logits, labels = self._common_step(batch=batch, batch_idx=batch_idx)
118
 
119
  return logits, labels