Spaces:

xiomarablanco
/

plentas

Runtime error

File size: 8,681 Bytes

import pandas as pd
import json

from codeScripts.Dependencies.SentenceTransformer2 import *
from codeScripts.utils import load_json, create_file_path

class GetSettings():
    """
    This class stores the selected settings for the current experiment
    """
    def __init__(self, config_settings, studentsData):

        #extracting the settings from the configuration document
        self.__getConfigSettings(config_settings)

        #getting the responses to study
        self.__getDatatoStudy(studentsData)

    def __getDatatoStudy(self, data):
        if data[0] == None:
            #extracting the info from the path in the config json         
            self.__getData(self.json_file_in)
        else:
            #extracting the info from the selected file in the api
            self.__getApiData(data)

    def setApiSettings(self, api_settings):
        """
        This function is to overwrite the parameters with the selected values from the api
        Inputs:
            -api_settings: dictionary with the stored parameters from the api
        """
        #transforming string dict into dict
        #api_settings = json.loads(api_settings)

        self.PesoOrtografia = api_settings["ortographyPercentage"]
        self.PesoSintaxis = api_settings["syntaxPercentage"]
        self.PesoSemantics = api_settings["semanticPercentage"]
        self.rango_ID = api_settings["students"]     
        
    def __getConfigSettings(self, df):
        """
        This method is used to import the settings from the config json
        Inputs:
            -df: The dataframe where the config json data is loaded
        """        

        #+++ General settings +++

        #path where the dataset is stored
        self.json_file_in = df["ruta_fichero_entrada"]
        #path where output is to be stored
        self.json_file_out = df["ruta_fichero_salida"]

        #path to access hunspell components
        self.hunspell_aff = df["ruta_hunspell"]["aff"]
        self.hunspell_dic = df["ruta_hunspell"]["dic"]

        #range of students to study  ---- Will be overwritten from api
        if df["Parametros_Analisis"]["estudiantes"]["Todos"]:
            self.rango_ID = "All"
        else:
            self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"]
        

        self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"])
        self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1)


        #+++ Ortography +++

        #If the ortographic level is activated
        self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"]
        #Max number of permitted errors
        self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"]
        #Max number of permitted errors before beginning to substract
        self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"]
        #Level weight (rubrics)
        self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"]

        #+++ Syntax +++
        #if the syntactic level is activated
        self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"]
        #max number of sentences and words permitted
        self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"]
        self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"]        
        #***weight of the level
        self.PesoSintaxis =  df["Parametros_Rubrica"]["Sintaxis"]["Peso"]


        #+++ Semantics +++
        #if the semantic level is activated
        self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"]
        #***weight of the level
        self.PesoSemantics =  df["Parametros_Rubrica"]["Semantica"]["Peso"]
        
        #--- Similarity ---
        SpacyPackage =  df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"]
        self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage]
        print("spacy_package", self.spacy_package)
        #the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg)
        self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage]
        print("lofThreshold", self.LofRespThreshold)

        #the different thresholds (min-max) to adapt the similarity score 
        self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage]
        print("self.UmbralesSimilitud", self.UmbralesSimilitud)
        
        #To configure only once the bert model parameters
        
        model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"]
        self.model_path = create_file_path('', doctype=4) + model_name
        print("self.model_path", self.model_path)

        self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"]
        print("self.modelr", self.modelr)
        self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"]
        print("self.epochr", self.epochr)

        self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr])

        #Variables to store some values
        self.studentID = ""
        self.faltasOrto = 0
        self.palabrasPorFrase = 0
        self.minipreguntasMalSpacy = ""
        self.minipreguntasMalBert = ""


    def __getApiData(self, json_file):
        """
        This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc)
        """
        self.answersDF = pd.DataFrame(json_file[0])
        self.id_number = 0
        
        self.minipreguntas = []
        self.minirespuestas = []
        self.indice_minipreguntas = []
        self.respuesta_prof = ""

        self.enunciado = json_file[1]['enunciado']
        self.prof_keywords = json_file[1]['keywords']

        try:
            i=0
            while True:
                self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta'])
                self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta'])

                self.indice_minipreguntas.append("minipregunta" + str(i))              

                if i == 0:        
                    self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] 
                else:
                    self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] 
                
                i+=1
        except:
            pass

        info_profesor = []
        for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
            info_profesor.append([minipregunta,minirespuesta])

        save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)

    def __getData(self, json_file):
        """
        This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc)
        """
    
        self.answersDF = pd.DataFrame(load_json(json_file))
        #self.answersDF_json = copy.deepcopy(data)
        #self.answersDF_json2 = dict()

        self.id_number = 0
        
        self.minipreguntas = []
        self.minirespuestas = []
        self.indice_minipreguntas = []
        self.respuesta_prof = ""

        self.enunciado = self.answersDF['metadata'][0]['enunciado']
        self.prof_keywords = self.answersDF['metadata'][0]['keywords']

        
        try:
            i=0
            while True:
            #for i in range(4):
                self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta'])
                self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta'])

                self.indice_minipreguntas.append("minipregunta" + str(i))              

                if i == 0:        
                    self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] 
                else:
                    self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] 
                
                i+=1
        except:
            pass
            #self.indice_minipreguntas.append("respuesta_completa")

        #self.minirespuestas.append(self.respuesta_prof)

        info_profesor = []
        for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
            info_profesor.append([minipregunta,minirespuesta])

        save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)