sohomghosh
/

LIPI_FinSim4_ESG_task2

PyTorch

Model card Files Files and versions Community

sohomghosh commited on Jun 27, 2022

Commit

ed29696

•

1 Parent(s): 8a57cfb

Update README.md

Browse files

Files changed (1) hide show

README.md +7 -9

README.md CHANGED Viewed

@@ -43,13 +43,13 @@ class Triage(Dataset):
     This is a subclass of torch packages Dataset class. It processes input to create ids, masks and targets required for model training.
     """
-    def __init__(self, dataframe, tokenizer, max_len, text_col_name, category_col):
         self.len = len(dataframe)
         self.data = dataframe
         self.tokenizer = tokenizer
         self.max_len = max_len
         self.text_col_name = text_col_name
-        self.category_col = category_col
     def __getitem__(self, index):
         title = str(self.data[self.text_col_name][index])
@@ -69,14 +69,12 @@ class Triage(Dataset):
         return {
             "ids": torch.tensor(ids, dtype=torch.long),
             "mask": torch.tensor(mask, dtype=torch.long),
-            "targets": torch.tensor(
-                self.data[self.category_col][index], dtype=torch.long
-            ),
         }
     def __len__(self):
         return self.len
 class BERTClass(torch.nn.Module):
     def __init__(self, num_class):
         super(BERTClass, self).__init__()
@@ -97,7 +95,7 @@ class BERTClass(torch.nn.Module):
         output = self.classifier(pooler)
         return output
-def do_predict(tokenizer):
   test_set = Triage(test_df, tokenizer, MAX_LEN, text_col_name)
   test_params = {'batch_size' : BATCH_SIZE, 'shuffle': False, 'num_workers':0}
   test_loader = DataLoader(test_set, **test_params)
@@ -116,12 +114,12 @@ def do_predict(tokenizer):
   actual_predictions = [i[0] for i in preds.tolist()]
   return actual_predictions
-model_sus = BERTClass(2)
 model_sustain.to(device)
 model_sustain.load_state_dict(torch.load('pytorch_model.bin', map_location=device)['model_state_dict'])
 tokenizer_sus = BertTokenizer.from_pretrained('roberta-base')
-actual_predictions_sus = do_predict(tokenizer_sus)
 test_df['sustainability'] = ['sustainable' if i==0 else 'unsustainable' for i in actual_predictions_read]
 ```

     This is a subclass of torch packages Dataset class. It processes input to create ids, masks and targets required for model training.
     """
+    def __init__(self, dataframe, tokenizer, max_len, text_col_name):
         self.len = len(dataframe)
         self.data = dataframe
         self.tokenizer = tokenizer
         self.max_len = max_len
         self.text_col_name = text_col_name
     def __getitem__(self, index):
         title = str(self.data[self.text_col_name][index])
         return {
             "ids": torch.tensor(ids, dtype=torch.long),
             "mask": torch.tensor(mask, dtype=torch.long),
         }
     def __len__(self):
         return self.len
 class BERTClass(torch.nn.Module):
     def __init__(self, num_class):
         super(BERTClass, self).__init__()
         output = self.classifier(pooler)
         return output
+def do_predict(model, tokenizer):
   test_set = Triage(test_df, tokenizer, MAX_LEN, text_col_name)
   test_params = {'batch_size' : BATCH_SIZE, 'shuffle': False, 'num_workers':0}
   test_loader = DataLoader(test_set, **test_params)
   actual_predictions = [i[0] for i in preds.tolist()]
   return actual_predictions
+model_sustain = BERTClass(2)
 model_sustain.to(device)
 model_sustain.load_state_dict(torch.load('pytorch_model.bin', map_location=device)['model_state_dict'])
 tokenizer_sus = BertTokenizer.from_pretrained('roberta-base')
+actual_predictions_sus = do_predict(model_sustain, tokenizer_sus)
 test_df['sustainability'] = ['sustainable' if i==0 else 'unsustainable' for i in actual_predictions_read]
 ```