from typing import List
from transformers import pipeline, AutoTokenizer
import os

class Detector:
    """
    A class for detecting various forms of bias in text using pre-trained models.
    """

    def __init__(self, classifier, model_type):
        """
        Initializes the detector with a specific model.

        Args:
            classifier (str): The type of classifier to use.
            model_type (str): The type of the model to use.
        """
        self.classifier = classifier
        self.model_type = model_type
        self.classifier_model_mapping = {
                "Token": {
                    "All": "wu981526092/Token-Level-Multidimensional-Bias-Detector",
                    "Race": "wu981526092/Token-Level-Race-Bias-Detector",
                    "Gender": "wu981526092/Token-Level-Gender-Bias-Detector",
                    "Profession": "wu981526092/Token-Level-Profession-Bias-Detector",
                    "Religion": "wu981526092/Token-Level-Religion-Bias-Detector",
                },
                "Sentence": {
                    "All": "wu981526092/Sentence-Level-Multidimensional-Bias-Detector",
                    "Religion": "wu981526092/Sentence-Level-Religion-Bias-Detector",
                    "Profession": "wu981526092/Sentence-Level-Profession-Bias-Detector",
                    "Race": "wu981526092/Sentence-Level-Race-Bias-Detector",
                    "Gender": "wu981526092/Sentence-Level-Gender-Bias-Detector",
                }
        }

        if classifier not in self.classifier_model_mapping:
            raise ValueError(f"Invalid classifier. Expected one of: {list(self.classifier_model_mapping.keys())}")

        if model_type not in self.classifier_model_mapping[classifier]:
            raise ValueError(
                f"Invalid model_type for {classifier}. Expected one of: {list(self.classifier_model_mapping[classifier].keys())}")

        self.model_path = self.classifier_model_mapping[classifier][model_type]
        #API_token = os.getenv("BIAS_DETECTOR_API_KEY")
        API_token = "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"
        # Using pipeline for inference
        if classifier == 'Token':
            task_type = "ner"
            self.model = pipeline(task_type, model=self.model_path, tokenizer=self.model_path,use_auth_token=API_token)
        else:
            task_type = "text-classification"
            self.model = pipeline(task_type, model=self.model_path, tokenizer=self.model_path,use_auth_token=API_token,return_all_scores= True)


    def predict(self, texts: List[str]):
        """
        Predicts the bias of the given text or list of texts.

        Args:
            texts (List[str]): A list of strings to analyze.

        Returns:
            A list of dictionaries. Each dictionary contains the 'label' and 'score' for each text.
        """
        if not all(isinstance(text, str) for text in texts):
            raise ValueError("All elements in 'texts' should be of str type")

        results = []
        predictions = self.model(texts)
        for text, prediction in zip(texts, predictions):
            result = {}
            if self.classifier == 'Token':
                for item in prediction:
                    result[item['word']] = {item['entity']: item['score']}
            elif self.classifier == 'Sentence':
                result = {item['label'].split('__')[-1]: item['score'] for item in prediction}
            results.append({text: result})

        return results


if __name__ == '__main__':
    detector_test = Detector("Sentence","Gender")
    test_data = ["The girl performed poorly at reading in school.","Sir is an address showing respect for a person. It usually refers to a male."]
    result = detector_test.predict(test_data)
    print(result)
    print(result[1][test_data[1]])