Spaces:
Build error
Build error
import pandas as pd | |
import json | |
from codeScripts.Dependencies.SentenceTransformer2 import * | |
from codeScripts.utils import load_json, create_file_path | |
class GetSettings(): | |
""" | |
This class stores the selected settings for the current experiment | |
""" | |
def __init__(self, config_settings, studentsData): | |
#extracting the settings from the configuration document | |
self.__getConfigSettings(config_settings) | |
#getting the responses to study | |
self.__getDatatoStudy(studentsData) | |
def __getDatatoStudy(self, data): | |
if data[0] == None: | |
#extracting the info from the path in the config json | |
self.__getData(self.json_file_in) | |
else: | |
#extracting the info from the selected file in the api | |
self.__getApiData(data) | |
def setApiSettings(self, api_settings): | |
""" | |
This function is to overwrite the parameters with the selected values from the api | |
Inputs: | |
-api_settings: dictionary with the stored parameters from the api | |
""" | |
#transforming string dict into dict | |
api_settings = json.loads(api_settings) | |
self.PesoOrtografia = api_settings["ortographyPercentage"] | |
self.PesoSintaxis = api_settings["syntaxPercentage"] | |
self.PesoSemantics = api_settings["semanticPercentage"] | |
self.rango_ID = api_settings["students"] | |
def __getConfigSettings(self, df): | |
""" | |
This method is used to import the settings from the config json | |
Inputs: | |
-df: The dataframe where the config json data is loaded | |
""" | |
#+++ General settings +++ | |
#path where the dataset is stored | |
self.json_file_in = df["ruta_fichero_entrada"] | |
#path where output is to be stored | |
self.json_file_out = df["ruta_fichero_salida"] | |
#path to access hunspell components | |
self.hunspell_aff = df["ruta_hunspell"]["aff"] | |
self.hunspell_dic = df["ruta_hunspell"]["dic"] | |
#range of students to study ---- Will be overwritten from api | |
if df["Parametros_Analisis"]["estudiantes"]["Todos"]: | |
self.rango_ID = "All" | |
else: | |
self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"] | |
self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"]) | |
self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1) | |
#+++ Ortography +++ | |
#If the ortographic level is activated | |
self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"] | |
#Max number of permitted errors | |
self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"] | |
#Max number of permitted errors before beginning to substract | |
self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"] | |
#Level weight (rubrics) | |
self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"] | |
#+++ Syntax +++ | |
#if the syntactic level is activated | |
self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"] | |
#max number of sentences and words permitted | |
self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"] | |
self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"] | |
#***weight of the level | |
self.PesoSintaxis = df["Parametros_Rubrica"]["Sintaxis"]["Peso"] | |
#+++ Semantics +++ | |
#if the semantic level is activated | |
self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"] | |
#***weight of the level | |
self.PesoSemantics = df["Parametros_Rubrica"]["Semantica"]["Peso"] | |
#--- Similarity --- | |
SpacyPackage = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"] | |
self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage] | |
print("spacy_package", self.spacy_package) | |
#the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg) | |
self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage] | |
print("lofThreshold", self.LofRespThreshold) | |
#the different thresholds (min-max) to adapt the similarity score | |
self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage] | |
print("self.UmbralesSimilitud", self.UmbralesSimilitud) | |
#To configure only once the bert model parameters | |
model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"] | |
self.model_path = create_file_path('', doctype=4) + model_name | |
print("self.model_path", self.model_path) | |
self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"] | |
print("self.modelr", self.modelr) | |
self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"] | |
print("self.epochr", self.epochr) | |
self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr]) | |
#Variables to store some values | |
self.studentID = "" | |
self.faltasOrto = 0 | |
self.palabrasPorFrase = 0 | |
self.minipreguntasMalSpacy = "" | |
self.minipreguntasMalBert = "" | |
def __getApiData(self, json_file): | |
""" | |
This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc) | |
""" | |
self.answersDF = pd.DataFrame(json_file[0]) | |
self.id_number = 0 | |
self.minipreguntas = [] | |
self.minirespuestas = [] | |
self.indice_minipreguntas = [] | |
self.respuesta_prof = "" | |
self.enunciado = json_file[1]['enunciado'] | |
self.prof_keywords = json_file[1]['keywords'] | |
try: | |
i=0 | |
while True: | |
self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta']) | |
self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta']) | |
self.indice_minipreguntas.append("minipregunta" + str(i)) | |
if i == 0: | |
self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] | |
else: | |
self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] | |
i+=1 | |
except: | |
pass | |
info_profesor = [] | |
for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas): | |
info_profesor.append([minipregunta,minirespuesta]) | |
save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor) | |
def __getData(self, json_file): | |
""" | |
This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc) | |
""" | |
self.answersDF = pd.DataFrame(load_json(json_file)) | |
#self.answersDF_json = copy.deepcopy(data) | |
#self.answersDF_json2 = dict() | |
self.id_number = 0 | |
self.minipreguntas = [] | |
self.minirespuestas = [] | |
self.indice_minipreguntas = [] | |
self.respuesta_prof = "" | |
self.enunciado = self.answersDF['metadata'][0]['enunciado'] | |
self.prof_keywords = self.answersDF['metadata'][0]['keywords'] | |
try: | |
i=0 | |
while True: | |
#for i in range(4): | |
self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta']) | |
self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta']) | |
self.indice_minipreguntas.append("minipregunta" + str(i)) | |
if i == 0: | |
self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i] | |
else: | |
self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i] | |
i+=1 | |
except: | |
pass | |
#self.indice_minipreguntas.append("respuesta_completa") | |
#self.minirespuestas.append(self.respuesta_prof) | |
info_profesor = [] | |
for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas): | |
info_profesor.append([minipregunta,minirespuesta]) | |
save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor) |