Spaces:

Jacaranda
/

Facility_Predict

Runtime error

App Files Files Community

Jacaranda commited on Jun 7, 2023

Commit

cdda6e7

1 Parent(s): 424ae24

Delete app

Browse files

Files changed (3) hide show

app/__init__.py +0 -0
app/app.py +0 -23
app/predict.py +0 -154

app/__init__.py DELETED Viewed

File without changes

app/app.py DELETED Viewed

@@ -1,23 +0,0 @@
-import gradio as gr
-import pandas as pd
-import json
-from tqdm import tqdm
-from predict import Preprocess, Facility_Model, obj_Facility_Model, processor
-def predict_facility(data):
-    pred_data = processor.process_tokenizer(data)
-    predictions = obj_Facility_Model.inference(pred_data)
-    return json.loads(predictions)
-iface = gr.Interface(
-    fn=predict_facility,
-    inputs="text",
-    outputs="json",
-    title=" Single Facility Prediction",
-    description="Predict the facility based on input data.",
-    #examples=[["kilifi"], ["mombasa"], ["nairobi"]],
-)
-if __name__ == "__main__":
-    iface.launch()

app/predict.py DELETED Viewed

@@ -1,154 +0,0 @@
-import os
-import random
-import json
-import numpy as np
-import torch
-import heapq
-import pandas as pd
-from tqdm import tqdm
-from transformers import AutoModelForSequenceClassification, AutoTokenizer
-from torch.utils.data import TensorDataset, DataLoader
-class Preprocess:
-    def __init__(self, tokenizer_vocab_path, tokenizer_max_len):
-        self.tokenizer = AutoTokenizer.from_pretrained(tokenizer_vocab_path,
-                                                       use_auth_token='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')
-        self.max_len = tokenizer_max_len
-    def clean_text(self, text):
-        text = text.lower()
-        stopwords = ["i", "was", "transferred",
-                     "from", "to", "nilienda", "kituo",
-                     "cha", "lakini", "saa", "hii", "niko",
-                     "at", "nilienda", "nikahudumiwa", "pole",
-                     "deliver", "na", "ni", "baada", "ya",
-                     "kutumwa", "kutoka", "nilienda",
-                     "ndipo", "nikapewa", "hiyo", "lindam ama", "nikawa",
-                     "mgonjwa", "nikatibiwa", "in", "had", "a",
-                     "visit", "gynaecologist", "ndio",
-                     "karibu", "mimi", "niko", "sehemu", "hospitali",
-                     "serikali", "delivered", "katika", "kaunti", "kujifungua",
-                     "katika", "huko", "nilipoenda", "kwa", "bado", "naedelea",
-                     "sija", "maliza", "mwisho",
-                     "nilianza", "kliniki", "yangu",
-                     "nilianzia", "nilijifungua"]
-        text_single = ' '.join(word for word in text.split() if word not in stopwords)
-        return text_single
-    def encode_fn(self, text_single):
-        """
-        Using tokenizer to preprocess the text
-        example of text_single:'Nairobi Hospital'
-        """
-        tokenizer = self.tokenizer(text_single,
-                                   padding=True,
-                                   truncation=True,
-                                   max_length=self.max_len,
-                                   return_tensors='pt'
-                                   )
-        input_ids = tokenizer['input_ids']
-        attention_mask = tokenizer['attention_mask']
-        return input_ids, attention_mask
-    def process_tokenizer(self, text_single):
-        """
-        Preprocess text and prepare dataloader for a single new sentence
-        """
-        input_ids, attention_mask = self.encode_fn(text_single)
-        data = TensorDataset(input_ids, attention_mask)
-        return data
-class Facility_Model:
-    def __init__(self, facility_model_path: any,
-                 max_len: int):
-        self.max_len = max_len
-        self.softmax = torch.nn.Softmax(dim=1)
-        self.gpu = False
-        self.model = AutoModelForSequenceClassification.from_pretrained(facility_model_path,
-                                                                        use_auth_token='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')
-        self.model.eval()  # set pytorch model for inference mode
-        if torch.cuda.device_count() > 1:
-            self.model = torch.nn.DataParallel(self.model)
-        if self.gpu:
-            seed = 42
-            random.seed(seed)
-            np.random.seed(seed)
-            torch.manual_seed(seed)
-            torch.cuda.manual_seed_all(seed)
-            torch.backends.cudnn.deterministic = True
-            self.device = torch.device('cuda')
-        else:
-            self.device = 'cpu'
-        self.model = self.model.to(self.device)
-    def predict_single(self, model, pred_data):
-        """
-        Model inference for new single sentence
-        """
-        pred_dataloader = DataLoader(pred_data, batch_size=10, shuffle=False)
-        for i, batch in enumerate(pred_dataloader):
-            with torch.no_grad():
-                outputs = model(input_ids=batch[0].to(self.device),
-                                attention_mask=batch[1].to(self.device)
-                                )
-                loss, logits = outputs.loss, outputs.logits
-                probability = self.softmax(logits)
-                probability_list = probability.detach().cpu().numpy()
-        return probability_list
-    def output_intent_probability(self, pred: any) -> dict:
-        """
-        convert the model output into a dictionary with  all intents and its probability
-        """
-        output_dict = {}
-        # transform the relation table(between label and intent)
-        path_table = pd.read_csv('/content/drive/MyDrive/dhis14000/dhis_label_relation_14357.csv')
-        label_intent_dict = path_table[["label", "corresponding_label"]].set_index("corresponding_label").to_dict()[
-            'label']
-        # transform the output into dictionary(between intent and probability)
-        for intent in range(pred.shape[1]):
-            output_dict[label_intent_dict[intent]] = pred[0][intent]
-        return output_dict
-    def inference(self, prepared_data):
-        """
-        Make predictions on one new sentence and output a JSON format variable
-        """
-        temp = []
-        prob_distribution = self.predict_single(self.model, prepared_data)
-        prediction_results = self.output_intent_probability(prob_distribution.astype(float))
-        # Filter out predictions containing "dental" or "optical" keywords
-        filtered_results = {intent: prob for intent, prob in prediction_results.items()
-                            if
-                            "dental" not in intent.lower() and "optical" not in intent.lower() and "eye" not in intent.lower()}
-        sorted_pred_intent_results = sorted(filtered_results.items(), key=lambda x: x[1], reverse=True)
-        sorted_pred_intent_results_dict = dict(sorted_pred_intent_results)
-        # Return the top result
-        top_results = dict(list(sorted_pred_intent_results)[:4])
-        temp.append(top_results)
-        final_preds = json.dumps(temp)
-        #final_preds = ', '.join(top_results.keys())
-        #final_preds = ', '.join(top_results)
-       # final_preds = final_preds.replace("'", "")
-        return final_preds
-jacaranda_hugging_face_model = "Jacaranda/dhis_14000_600k_Test_Model"
-obj_Facility_Model = Facility_Model(facility_model_path=jacaranda_hugging_face_model,
-                                    max_len=128
-                                    )
-processor = Preprocess(tokenizer_vocab_path=jacaranda_hugging_face_model,
-                       tokenizer_max_len=128
-                       )