plentas

Build error

App Files Files Community

plentas / codeScripts /settings.py

jfarray

Duplicate from xiomarablanco/plentas

51a4fb0 almost 2 years ago

raw

history blame

8.68 kB

	import pandas as pd
	import json

	from codeScripts.Dependencies.SentenceTransformer2 import *
	from codeScripts.utils import load_json, create_file_path

	class GetSettings():
	"""
	This class stores the selected settings for the current experiment
	"""
	def __init__(self, config_settings, studentsData):

	#extracting the settings from the configuration document
	self.__getConfigSettings(config_settings)

	#getting the responses to study
	self.__getDatatoStudy(studentsData)

	def __getDatatoStudy(self, data):
	if data[0] == None:
	#extracting the info from the path in the config json
	self.__getData(self.json_file_in)
	else:
	#extracting the info from the selected file in the api
	self.__getApiData(data)

	def setApiSettings(self, api_settings):
	"""
	This function is to overwrite the parameters with the selected values from the api
	Inputs:
	-api_settings: dictionary with the stored parameters from the api
	"""
	#transforming string dict into dict
	api_settings = json.loads(api_settings)

	self.PesoOrtografia = api_settings["ortographyPercentage"]
	self.PesoSintaxis = api_settings["syntaxPercentage"]
	self.PesoSemantics = api_settings["semanticPercentage"]
	self.rango_ID = api_settings["students"]

	def __getConfigSettings(self, df):
	"""
	This method is used to import the settings from the config json
	Inputs:
	-df: The dataframe where the config json data is loaded
	"""

	#+++ General settings +++

	#path where the dataset is stored
	self.json_file_in = df["ruta_fichero_entrada"]
	#path where output is to be stored
	self.json_file_out = df["ruta_fichero_salida"]

	#path to access hunspell components
	self.hunspell_aff = df["ruta_hunspell"]["aff"]
	self.hunspell_dic = df["ruta_hunspell"]["dic"]

	#range of students to study ---- Will be overwritten from api
	if df["Parametros_Analisis"]["estudiantes"]["Todos"]:
	self.rango_ID = "All"
	else:
	self.rango_ID = df["Parametros_Analisis"]["estudiantes"]["ID_rango"]


	self.minAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Minimo"])
	self.maxAgrupation = int(df["Parametros_Analisis"]["Semantica"]["frases"]["Agrupacion"]["Maximo"] + 1)


	#+++ Ortography +++

	#If the ortographic level is activated
	self.Ortografia = df["Parametros_Analisis"]["Ortografia"]["Activado"]
	#Max number of permitted errors
	self.NMaxErrores = df["Parametros_Rubrica"]["Ortografia"]["NMaxErrores"]
	#Max number of permitted errors before beginning to substract
	self.FaltasSalvaguarda= df["Parametros_Rubrica"]["Ortografia"]["FaltasSalvaguarda"]
	#Level weight (rubrics)
	self.PesoOrtografia = df["Parametros_Rubrica"]["Ortografia"]["Peso"]

	#+++ Syntax +++
	#if the syntactic level is activated
	self.Sintaxis = df["Parametros_Analisis"]["Sintaxis"]["Activado"]
	#max number of sentences and words permitted
	self.NMaxFrases = df["Parametros_Rubrica"]["Sintaxis"]["NMaxFrases"]
	self.NMaxPalabras= df["Parametros_Rubrica"]["Sintaxis"]["NMaxPalabras"]
	#***weight of the level
	self.PesoSintaxis = df["Parametros_Rubrica"]["Sintaxis"]["Peso"]


	#+++ Semantics +++
	#if the semantic level is activated
	self.Semantica = df["Parametros_Analisis"]["Semantica"]["Activado"]
	#***weight of the level
	self.PesoSemantics = df["Parametros_Rubrica"]["Semantica"]["Peso"]

	#--- Similarity ---
	SpacyPackage = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"]["Package"]
	self.spacy_package = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Spacy"][SpacyPackage]
	print("spacy_package", self.spacy_package)
	#the minimun value to select one line of response as similar (0.615 sm - 0.875 md and lg)
	self.LofRespThreshold = df["Parametros_Rubrica"]["Semantica"]["LineaRespuesta"]["ThresholdToConsiderCeroValue"][SpacyPackage]
	print("lofThreshold", self.LofRespThreshold)

	#the different thresholds (min-max) to adapt the similarity score
	self.UmbralesSimilitud= df["Parametros_Rubrica"]["Semantica"]["Similitud"]["UmbralesSimilitud"][SpacyPackage]
	print("self.UmbralesSimilitud", self.UmbralesSimilitud)

	#To configure only once the bert model parameters

	model_name = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model_path"]
	self.model_path = create_file_path('', doctype=4) + model_name
	print("self.model_path", self.model_path)

	self.modelr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["model"]
	print("self.modelr", self.modelr)
	self.epochr = df["Parametros_Rubrica"]["Semantica"]["Similitud"]["Bert"]["epoch"]
	print("self.epochr", self.epochr)

	self.BertModels_glbl = SentTransf_test([self.modelr], [self.epochr])

	#Variables to store some values
	self.studentID = ""
	self.faltasOrto = 0
	self.palabrasPorFrase = 0
	self.minipreguntasMalSpacy = ""
	self.minipreguntasMalBert = ""


	def __getApiData(self, json_file):
	"""
	This method is used to extract the data and format of the exam from the api (sub-question, sub-answers, etc)
	"""
	self.answersDF = pd.DataFrame(json_file[0])
	self.id_number = 0

	self.minipreguntas = []
	self.minirespuestas = []
	self.indice_minipreguntas = []
	self.respuesta_prof = ""

	self.enunciado = json_file[1]['enunciado']
	self.prof_keywords = json_file[1]['keywords']

	try:
	i=0
	while True:
	self.minirespuestas.append(json_file[1]['minipreguntas'][i]['minirespuesta'])
	self.minipreguntas.append(json_file[1]['minipreguntas'][i]['minipregunta'])

	self.indice_minipreguntas.append("minipregunta" + str(i))

	if i == 0:
	self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i]
	else:
	self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i]

	i+=1
	except:
	pass

	info_profesor = []
	for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
	info_profesor.append([minipregunta,minirespuesta])

	save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)

	def __getData(self, json_file):
	"""
	This method is used to extract the data and format of the exam from the path that appears in the config json (sub-question, sub-answers, etc)
	"""

	self.answersDF = pd.DataFrame(load_json(json_file))
	#self.answersDF_json = copy.deepcopy(data)
	#self.answersDF_json2 = dict()

	self.id_number = 0

	self.minipreguntas = []
	self.minirespuestas = []
	self.indice_minipreguntas = []
	self.respuesta_prof = ""

	self.enunciado = self.answersDF['metadata'][0]['enunciado']
	self.prof_keywords = self.answersDF['metadata'][0]['keywords']


	try:
	i=0
	while True:
	#for i in range(4):
	self.minirespuestas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minirespuesta'])
	self.minipreguntas.append(self.answersDF['metadata'][0]['minipreguntas'][i]['minipregunta'])

	self.indice_minipreguntas.append("minipregunta" + str(i))

	if i == 0:
	self.respuesta_prof = self.respuesta_prof + self.minirespuestas[i]
	else:
	self.respuesta_prof = self.respuesta_prof + ' ' + self.minirespuestas[i]

	i+=1
	except:
	pass
	#self.indice_minipreguntas.append("respuesta_completa")

	#self.minirespuestas.append(self.respuesta_prof)

	info_profesor = []
	for minipregunta, minirespuesta in zip(self.minipreguntas, self.minirespuestas):
	info_profesor.append([minipregunta,minirespuesta])

	save_json(create_file_path("MinirespuestasProfesor.json", 2), info_profesor)