plentas / codeScripts /settings.py
jfarray's picture
Duplicate from xiomarablanco/plentas
51a4fb0
raw
history blame
8.68 kB
import pandas as pd
import json
from codeScripts.Dependencies.SentenceTransformer2 import *
from codeScripts.utils import load_json, create_file_path
class GetSettings():
"""
This class stores the selected settings for the current experiment
"""
def __init__(self, config_settings, studentsData):
#extracting the settings from the configuration document
self.__getConfigSettings(config_settings)
#getting the responses to study
self.__getDatatoStudy(studentsData)
def __getDatatoStudy(self, data):
if data[0] == None:
#extracting the info from the path in the config json
self.__getData(self.json_file_in)
else:
#extracting the info from the selected file in the api
self.__getApiData(data)
def setApiSettings(self, api_settings):
"""
This function is to overwrite the parameters with the selected values from the api
Inputs:
-api_settings: dictionary with the stored parameters from the api
"""
#transforming string dict into dict
api_settings = json.loads(api_settings)
self.PesoOrtografia = api_settings["ortographyPercentage"]
self.PesoSintaxis = api_settings["syntaxPercentage"]
self.PesoSemantics = api_settings["semanticPercentage"]
self.rango_ID = api_settings["students"]
def __getConfigSettings(self, df):
"""
This method is used to import the settings from the config json
Inputs:
-df: The dataframe where the config json data is loaded
"""
#+++ General settings +++
#path where the dataset is stored
self.json_file_in = df["ruta_fichero_entrada"]
#path where output is to be stored
self.json_file_out = df["ruta_fichero_salida"]
#path to access hunspell components
self.hunspell_aff = df["ruta_hunspell"]["aff"]
self.hunspell_dic = df["ruta_hunspell"]["dic"]
#range of students to study ---- Will be overwritten from api
if df["Parametros_Analisis"]["estudiantes"]["Todos"]:
self.rango_ID = "All"
else:
self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"]
self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"])
self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1)
#+++ Ortography +++
#If the ortographic level is activated
self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"]
#Max number of permitted errors
self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"]
#Max number of permitted errors before beginning to substract
self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"]
#Level weight (rubrics)
self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"]
#+++ Syntax +++
#if the syntactic level is activated
self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"]
#max number of sentences and words permitted
self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"]
self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"]
#***weight of the level
self.PesoSintaxis = df["Parametros_Rubrica"]["Sintaxis"]["Peso"]
#+++ Semantics +++
#if the semantic level is activated
self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"]
#***weight of the level
self.PesoSemantics = df["Parametros_Rubrica"]["Semantica"]["Peso"]
#--- Similarity ---
SpacyPackage = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"]
self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage]
print("spacy_package", self.spacy_package)
#the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg)
self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage]
print("lofThreshold", self.LofRespThreshold)
#the different thresholds (min-max) to adapt the similarity score
self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage]
print("self.UmbralesSimilitud", self.UmbralesSimilitud)
#To configure only once the bert model parameters
model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"]
self.model_path = create_file_path('', doctype=4) + model_name
print("self.model_path", self.model_path)
self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"]
print("self.modelr", self.modelr)
self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"]
print("self.epochr", self.epochr)
self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr])
#Variables to store some values
self.studentID = ""
self.faltasOrto = 0
self.palabrasPorFrase = 0
self.minipreguntasMalSpacy = ""
self.minipreguntasMalBert = ""
def __getApiData(self, json_file):
"""
This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc)
"""
self.answersDF = pd.DataFrame(json_file[0])
self.id_number = 0
self.minipreguntas = []
self.minirespuestas = []
self.indice_minipreguntas = []
self.respuesta_prof = ""
self.enunciado = json_file[1]['enunciado']
self.prof_keywords = json_file[1]['keywords']
try:
i=0
while True:
self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta'])
self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta'])
self.indice_minipreguntas.append("minipregunta" + str(i))
if i == 0:
self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i]
else:
self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i]
i+=1
except:
pass
info_profesor = []
for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
info_profesor.append([minipregunta,minirespuesta])
save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)
def __getData(self, json_file):
"""
This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc)
"""
self.answersDF = pd.DataFrame(load_json(json_file))
#self.answersDF_json = copy.deepcopy(data)
#self.answersDF_json2 = dict()
self.id_number = 0
self.minipreguntas = []
self.minirespuestas = []
self.indice_minipreguntas = []
self.respuesta_prof = ""
self.enunciado = self.answersDF['metadata'][0]['enunciado']
self.prof_keywords = self.answersDF['metadata'][0]['keywords']
try:
i=0
while True:
#for i in range(4):
self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta'])
self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta'])
self.indice_minipreguntas.append("minipregunta" + str(i))
if i == 0:
self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i]
else:
self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i]
i+=1
except:
pass
#self.indice_minipreguntas.append("respuesta_completa")
#self.minirespuestas.append(self.respuesta_prof)
info_profesor = []
for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
info_profesor.append([minipregunta,minirespuesta])
save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)