import pandas as pd import json from codeScripts.Dependencies.SentenceTransformer2 import * from codeScripts.utils import load_json, create_file_path class GetSettings(): """ This class stores the selected settings for the current experiment """ def __init__(self, config_settings, studentsData): #extracting the settings from the configuration document self.__getConfigSettings(config_settings) #getting the responses to study self.__getDatatoStudy(studentsData) def __getDatatoStudy(self, data): if data[0] == None: #extracting the info from the path in the config json self.__getData(self.json_file_in) else: #extracting the info from the selected file in the api self.__getApiData(data) def setApiSettings(self, api_settings): """ This function is to overwrite the parameters with the selected values from the api Inputs: -api_settings: dictionary with the stored parameters from the api """ #transforming string dict into dict #api_settings = json.loads(api_settings) self.PesoOrtografia = api_settings["ortographyPercentage"] self.PesoSintaxis = api_settings["syntaxPercentage"] self.PesoSemantics = api_settings["semanticPercentage"] self.rango_ID = api_settings["students"] def __getConfigSettings(self, df): """ This method is used to import the settings from the config json Inputs: -df: The dataframe where the config json data is loaded """ #+++ General settings +++ #path where the dataset is stored self.json_file_in = df["ruta_fichero_entrada"] #path where output is to be stored self.json_file_out = df["ruta_fichero_salida"] #path to access hunspell components self.hunspell_aff = df["ruta_hunspell"]["aff"] self.hunspell_dic = df["ruta_hunspell"]["dic"] #range of students to study ---- Will be overwritten from api if df["Parametros_Analisis"]["estudiantes"]["Todos"]: self.rango_ID = "All" else: self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"] self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"]) self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1) #+++ Ortography +++ #If the ortographic level is activated self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"] #Max number of permitted errors self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"] #Max number of permitted errors before beginning to substract self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"] #Level weight (rubrics) self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"] #+++ Syntax +++ #if the syntactic level is activated self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"] #max number of sentences and words permitted self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"] self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"] #***weight of the level self.PesoSintaxis = df["Parametros_Rubrica"]["Sintaxis"]["Peso"] #+++ Semantics +++ #if the semantic level is activated self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"] #***weight of the level self.PesoSemantics = df["Parametros_Rubrica"]["Semantica"]["Peso"] #--- Similarity --- SpacyPackage = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"] self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage] print("spacy_package", self.spacy_package) #the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg) self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage] print("lofThreshold", self.LofRespThreshold) #the different thresholds (min-max) to adapt the similarity score self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage] print("self.UmbralesSimilitud", self.UmbralesSimilitud) #To configure only once the bert model parameters model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"] self.model_path = create_file_path('', doctype=4) + model_name print("self.model_path", self.model_path) self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"] print("self.modelr", self.modelr) self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"] print("self.epochr", self.epochr) self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr]) #Variables to store some values self.studentID = "" self.faltasOrto = 0 self.palabrasPorFrase = 0 self.minipreguntasMalSpacy = "" self.minipreguntasMalBert = "" def __getApiData(self, json_file): """ This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc) """ self.answersDF = pd.DataFrame(json_file[0]) self.id_number = 0 self.minipreguntas = [] self.minirespuestas = [] self.indice_minipreguntas = [] self.respuesta_prof = "" self.enunciado = json_file[1]['enunciado'] self.prof_keywords = json_file[1]['keywords'] try: i=0 while True: self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta']) self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta']) self.indice_minipreguntas.append("minipregunta" + str(i)) if i == 0: self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] else: self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] i+=1 except: pass info_profesor = [] for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas): info_profesor.append([minipregunta,minirespuesta]) save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor) def __getData(self, json_file): """ This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc) """ self.answersDF = pd.DataFrame(load_json(json_file)) #self.answersDF_json = copy.deepcopy(data) #self.answersDF_json2 = dict() self.id_number = 0 self.minipreguntas = [] self.minirespuestas = [] self.indice_minipreguntas = [] self.respuesta_prof = "" self.enunciado = self.answersDF['metadata'][0]['enunciado'] self.prof_keywords = self.answersDF['metadata'][0]['keywords'] try: i=0 while True: #for i in range(4): self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta']) self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta']) self.indice_minipreguntas.append("minipregunta" + str(i)) if i == 0: self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] else: self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] i+=1 except: pass #self.indice_minipreguntas.append("respuesta_completa") #self.minirespuestas.append(self.respuesta_prof) info_profesor = [] for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas): info_profesor.append([minipregunta,minirespuesta]) save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)