Spaces:

Jacaranda
/

Facility_Batch_Predict

Runtime error

App Files Files Community

Jacaranda commited on Jun 6, 2023

Commit

6de6e27

•

1 Parent(s): c8f83c0

Upload facility_predict.py

Browse files

Files changed (1) hide show

facility_predict.py +26 -9

facility_predict.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import os
 import random
-import gradio as gr
 import json
 import numpy as np
 import torch
@@ -13,7 +13,8 @@ from torch.utils.data import TensorDataset, DataLoader
 class Preprocess:
     def __init__(self, tokenizer_vocab_path, tokenizer_max_len):
-        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_vocab_path, use_auth_token='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')
         self.max_len = tokenizer_max_len
     def clean_text(self, text):
@@ -59,13 +60,15 @@ class Preprocess:
         data = TensorDataset(input_ids, attention_mask)
         return data
 class Facility_Model:
     def __init__(self, facility_model_path: any,
                  max_len: int):
         self.max_len = max_len
         self.softmax = torch.nn.Softmax(dim=1)
         self.gpu = False
-        self.model = AutoModelForSequenceClassification.from_pretrained(facility_model_path, use_auth_token='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')
         self.model.eval()  # set pytorch model for inference mode
         if torch.cuda.device_count() > 1:
@@ -105,9 +108,10 @@ class Facility_Model:
         """
         output_dict = {}
         # transform the relation table(between label and intent)
-        path_table = pd.read_csv('dhis_label_relation_14357.csv')
-        label_intent_dict = path_table[["label", "corresponding_label"]].set_index("corresponding_label").to_dict()['label']
         # transform the output into dictionary(between intent and probability)
         for intent in range(pred.shape[1]):
@@ -157,17 +161,30 @@ def predict_batch_from_csv(input_file, output_file):
     # Initialize predictions list
     predictions = []
     # Iterate over rows with tqdm for progress tracking
     for _, row in tqdm(batch_data.iterrows(), total=len(batch_data)):
-        text = row['facility_name']  # Replace 'facility_name' with the actual column name containing the text data
-        cleaned_text = processor.clean_text(text)
         prepared_data = processor.process_tokenizer(cleaned_text)
-        prediction = obj_Facility_Model.inference(prepared_data)
         predictions.append(prediction)
     # Create DataFrame for predictions
     output_data = pd.DataFrame({'prediction': predictions})
     # Merge with input DataFrame
-    pred_output_df = pd.concat([batch_data, output_data], axis=1)
     # Save predictions to CSV
     pred_output_df.to_csv(output_file, index=False)

+# writefile facility_predict.py
 import os
 import random
 import json
 import numpy as np
 import torch
 class Preprocess:
     def __init__(self, tokenizer_vocab_path, tokenizer_max_len):
+        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_vocab_path,
+                                                       use_auth_token='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')
         self.max_len = tokenizer_max_len
     def clean_text(self, text):
         data = TensorDataset(input_ids, attention_mask)
         return data
 class Facility_Model:
     def __init__(self, facility_model_path: any,
                  max_len: int):
         self.max_len = max_len
         self.softmax = torch.nn.Softmax(dim=1)
         self.gpu = False
+        self.model = AutoModelForSequenceClassification.from_pretrained(facility_model_path,
+                                                                        use_auth_token='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')
         self.model.eval()  # set pytorch model for inference mode
         if torch.cuda.device_count() > 1:
         """
         output_dict = {}
         # transform the relation table(between label and intent)
+        path_table = pd.read_csv('/content/drive/MyDrive/dhis14000/dhis_label_relation_14357.csv')
+        label_intent_dict = path_table[["label", "corresponding_label"]].set_index("corresponding_label").to_dict()[
+            'label']
         # transform the output into dictionary(between intent and probability)
         for intent in range(pred.shape[1]):
     # Initialize predictions list
     predictions = []
+    # Iterate over rows with tqdm for progress tracking
     # Iterate over rows with tqdm for progress tracking
     for _, row in tqdm(batch_data.iterrows(), total=len(batch_data)):
+        text = row['pnc_fac_name']  # Replace 'facility_name' with the actual column name containing the text data
+        if pd.isnull(text):
+            cleaned_text = ""
+        else:
+            cleaned_text = processor.clean_text(text)
         prepared_data = processor.process_tokenizer(cleaned_text)
+        if cleaned_text == "":
+            prediction = ""  # Set prediction as empty string
+        else:
+            prediction = obj_Facility_Model.inference(prepared_data)
         predictions.append(prediction)
     # Create DataFrame for predictions
     output_data = pd.DataFrame({'prediction': predictions})
     # Merge with input DataFrame
+    pred_output_df = pd.concat([batch_data.reset_index(drop=True), output_data], axis=1)
     # Save predictions to CSV
     pred_output_df.to_csv(output_file, index=False)