plentas

Build error

File size: 12,099 Bytes

51a4fb0

import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import copy

from codeScripts.Dependencies.SentenceTransformer2 import *
from codeScripts.utils import create_file_path, clean_words, save_json

#Done

class SemanticOutput():
    """
    Class to store the semantic processing and extract results
    """
    def __init__(self, settings):

        self.nota_spacy = dict()
        self.nota_spacy["spacy"] = dict()
        self.nota_spacy["bert"] = dict()

        self.nota_spacy_experimento = dict()
        self.nota_spacy_experimento["spacy"] = dict()
        self.nota_spacy_experimento["bert"] = dict()

        self.identifyLineofResponse = dict()
        self.identifyLineofResponse["spacy"] = dict()
        self.identifyLineofResponse["bert"] = dict()  

        self.identifyLineofResponse_toexcel = []

        self.notas_calculadas = dict()
        self.notas_calculadas["spacy"] = dict()
        self.notas_calculadas["bert"] = dict()  

        self.min_umbral = []
        self.max_umbral = []
        r= settings.UmbralesSimilitud.split(",")
        for i in r:
            c_w= clean_words(i)
            self.min_umbral.append(float(c_w[0]+'.'+c_w[1]))
            self.max_umbral.append(float(c_w[2]+'.'+c_w[3]))
            self.notas_calculadas["spacy"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = []
            self.notas_calculadas["bert"]['Umbral ' + c_w[0]+'.'+c_w[1] + ' - ' + c_w[2]+'.'+c_w[3]] = []


        #variables taken from the settings
        self.answersDF_json2 = dict()
        self.answersDF_json2["spacy"] = dict()
        self.answersDF_json2["bert"] = dict()  
        
        self.indiceMinipreguntas = settings.indice_minipreguntas
        print("AAAAAAAAAAAA")
        print(self.indiceMinipreguntas)
        self.LofRespThreshold = settings.LofRespThreshold
   
        self.indx = 1
    
    def __createDict__(self, nota_spacy:dict(), studentID, minipregunta, similarity_type, type = 0):

        if studentID not in nota_spacy[similarity_type].keys():
            nota_spacy[similarity_type][studentID] = dict()

        if type == 0:
            nota_spacy[similarity_type][studentID][minipregunta]= []
        else:
            nota_spacy[similarity_type][studentID][minipregunta]= dict()
        return nota_spacy

    def __plotHistogram__(self, save_file, x):
        """
        Generates an histogram of the given data.
        Inputs:
            save_file: The path where the histogram is to be generated.
            x: The data to be represented.
        """
        ax= sns.histplot(
                data    = x,
                stat    = "count",
                kde     = True,
                color = "black"
            )
        ax.set(xlabel='Deviation', ylabel='Count')

        figure = ax.get_figure()    
        figure.savefig(create_file_path(save_file,3))
        del figure
        ax.cla()

    def initInforms(self, studentID, minipregunta, similarity_type):
        """
        This function is for initializing the variables where data is to be stored.
        Inputs:
            studentID: The id of the student
            minipregunta: The minipregunta that is being studied
        """
        #identificar donde está la respuesta por minipreguta
        self.identifyLineofResponse = self.__createDict__(self.identifyLineofResponse, studentID, minipregunta, similarity_type, 1)

        #almacenar notas del evaluation process
        #self.nota_spacy_experimento = self.__createDict__(self.nota_spacy_experimento, studentID, similarity_type, 1) 
        
        self.nota_spacy_experimento[similarity_type][studentID] = dict()
        
        #Almacenar similitudes por minipregunta
        self.nota_spacy = self.__createDict__(self.nota_spacy, studentID, minipregunta, similarity_type)

        #separar y almacenar una a una las lineas de la respuesta
        self.answersDF_json2[similarity_type][studentID] = dict()
        self.answersDF_json2[similarity_type][studentID]["respuesta"] = dict()

    def updateInformsBucle(self, studentID, minipregunta, response, response_label, numberOfSentences, similarity, similarity_type, isMaxSimil):
        """
        This function is the previous needed step before using updateInforms. Stores the important iterative-generated information
        Inputs:
            -studentID: The id of the student
            -minipregunta: The minipregunta that is being studied
            -response: The student's response
            -response_label: The generated label that indicates the sentence number of the extracted response in the text.
            -numberOfSentences: The number of splitted sentences.
            -similarity: The obtained similarity score.
            -isMaxSimil: If the similarity score is the highest obtained at the moment or not.
        """
        #Storing the similarity score obtained for only one sentence
        if numberOfSentences == 1:
            self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)] = dict()
            self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Similitud"] = similarity
            self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Frase"] = response
            self.identifyLineofResponse[similarity_type][studentID][minipregunta][str(self.indx)]["Lineas"] = response_label

            self.answersDF_json2[similarity_type][studentID]["respuesta"][self.indx] = response
            self.indx+=1
        else:
            self.indx = 1

        #storing the maximum similarity for each set of sentences length
        if isMaxSimil:
            self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)] = dict()
            self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["MaxSimilitud"] = similarity
            self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Frase"] = response
            self.nota_spacy_experimento[similarity_type][studentID][str(numberOfSentences)]["Lineas"] = response_label

        #storing the similarity in every case
        self.nota_spacy[similarity_type][studentID][minipregunta].append([response, None, None] if response == "" else [response, similarity, response_label])

    def updateInforms(self, studentID, umbralL, umbralH, calculatedMark, similarity_type, response = ""):
        """
        This function is to store the obtained results from the processing of one response.
        Inputs:
            -studentID: The id of the student
            -umbralL: The fixed low threshold (config json)
            -umbralH: The fixed high threshold (config json)
            -calculatedMark: The calculated mark.
            -response: The student's response
        """
        print("ZZZZZ")
        print(similarity_type)
        #storing calculated marks
        self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas))

        #storing where the model thought the answer was
        for minipregunta in self.indiceMinipreguntas:
            print("EEEEE")
            print(self.identifyLineofResponse)
            aux = copy.deepcopy(self.identifyLineofResponse)
            for indx in aux[similarity_type][studentID][minipregunta].keys():
                if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075:
                    del self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]
            
            #Getting the number of the guess
            if response == "":
                self.identifyLineofResponse_toexcel.append([minipregunta, ""])
            else:
                max_n = -999999
                indx_queue = 0
                queue = []
                highlightedrows = ""
                highlightedmarks = ""

                for iter in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys():
                    for indx in self.identifyLineofResponse[similarity_type][studentID][minipregunta].keys():
                        if self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] > max_n and not indx in queue and self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"]>self.LofRespThreshold:
                            max_n = self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"]
                            indx_queue = indx
                    queue.append(indx_queue)
                    highlightedrows = highlightedrows + str(indx_queue) + " "
                    highlightedmarks = highlightedmarks + str(max_n) + " "
                    max_n = -999999
                    indx_queue = 0

                self.identifyLineofResponse_toexcel.append([minipregunta, highlightedrows, highlightedmarks])
                highlightedrows = ""
                highlightedmarks = ""
                queue = []

    def saveSimilarityResults(self, settings, similarity_type):
        """
        Saves the recopiled data in the corresponding format and path differentiating the types of semantic calculation.
        Inputs:
            -settings: system settings.
            -similarity_type: "spacy" if similarity is being calculated from Spacy (if it is not, bert is selected)
        """
        savePrefix = "Spacy - " if similarity_type == "spacy" else str(settings.modelr) + str(settings.epochr) + " - "
        
        #previous name - "AnalisisSemantico.json"
        save_json(create_file_path(savePrefix + "SimilitudPorConjunto.json",2), self.nota_spacy[similarity_type])
        save_json(create_file_path(savePrefix + "MaxSimilitudPorConjunto.json",2), self.nota_spacy_experimento[similarity_type])
        save_json(create_file_path(savePrefix + "LineaRespuesta.json",2), self.identifyLineofResponse[similarity_type])
        save_json(create_file_path(savePrefix + "RespuestaSeparadaPorFrases.json",2), self.answersDF_json2[similarity_type])
        
        
        Notasdf = pd.DataFrame()
        for intervaloUmbral in self.notas_calculadas[similarity_type]:
            Notasdf[intervaloUmbral] = self.notas_calculadas[similarity_type][intervaloUmbral]
        
        Notasdf.to_excel(create_file_path(savePrefix +'NotasCalculadas.xlsx',2), sheet_name='notas')
        
        #self.__plotHistogram__(savePrefix + "HistogramaNotasGeneradas.png", self.notas_calculadas[similarity_type])
    
class SintacticOutput():
    """
    Class to store the sintactic processing
    """
    def __init__(self):
        self.leg_FH =[]
        self.leg_mu = []

    def saveLegibilityResults(self):
        """
        Saves the recopiled data in the corresponding format.
        """
        save_json(create_file_path("FH-Readability.json",2), self.leg_FH, False)
        save_json(create_file_path("mu-Readability.json",2), self.leg_mu, False)

        x = []
        for i in range(len(self.leg_FH)):
            x.append(i)
        plt.figure(figsize=(15,7))
        plt.plot(x, self.leg_FH, label = "FH", color = (0.1,0.1,0.1))
        plt.plot(x, self.leg_mu, '--', label = "mu", color = (0.5,0.5,0.5))
        plt.xlabel("Student")
        plt.ylabel("Legibility (0-100)")
        plt.legend(loc=1)
        plt.title("FH vs mu")
        plt.xticks(rotation=-45)
        plt.grid()
        plt.savefig(create_file_path("Img_FHvsMu.png",3))
        plt.cla()   
 
class OrtographicOutput():
    """
    Class to store the ortographic processing
    """
    def __init__(self):
        self.notaOrtografia = []
        self.mistakes = []
        self.number_mistakes = []

    def saveOrtographicResults(self):
        """
        Saves the ortographic generated marks.
        """
        save_json(create_file_path("NotasOrtografia.json",2), self.notaOrtografia, False)