Spaces:

xiomarablanco
/

plentas

Runtime error

App Files Files Community

xiomarablanco commited on May 1, 2023

Commit

ce84b09

1 Parent(s): 0562415

Procesando respuestas de Moodle

Browse files

Files changed (7) hide show

.gitignore +3 -0
app.py +58 -17
codeScripts/methodologyPlentas.py +1 -1
codeScripts/rubrics.py +4 -0
codeScripts/rubricsOut.py +8 -6
codeScripts/utils.py +19 -3
plentas.py +7 -2

.gitignore CHANGED Viewed

@@ -2,3 +2,6 @@ api/StudentAnswers/*
 codeScripts/__pycache__/*
 codeScripts/Dependencies/__pycache__/*
 __pycache__/*

 codeScripts/__pycache__/*
 codeScripts/Dependencies/__pycache__/*
 __pycache__/*
+output/*
+archivos/*
+flagged/*

app.py CHANGED Viewed

@@ -2,7 +2,7 @@ import gradio as gr
 import json
 from flask import jsonify
 from sentence_transformers import SentenceTransformer, InputExample, util
-from codeScripts.utils import save_json, load_json, create_file_path
 from plentas import Plentas
 import pandas as pd
 import zipfile
@@ -10,6 +10,7 @@ import os
 import shutil
 from datetime import datetime
 import tablib
 def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange):
@@ -48,8 +49,12 @@ def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, se
         response = Plentas(config_json[0], [answersTodict(uploadedFilePath), createTeacherJson(configuration)])
         # # #overwriting the custom settings for the settings from the api
         response.setApiSettings(configuration)
-        modelResult = response.processApiData()
         # modelJson = json.dumps(modelResult)
@@ -71,8 +76,9 @@ def exportResultToExcelFile(modelResult):
         studentData = item[index]
         excelData.append(studentData)
         index+= 1
-    tableResults = tablib.Dataset(headers=('ID', 'SimilitudSpacy', 'SimilitudBert', 'NotaSemanticaSpacy', 'NotaSemanticaBert', 'NotaSintaxis', 'NotaOrtografia','NotaTotalSpacy','NotaTotalBert','Feedback'))
     tableResults.json=json.dumps(excelData)
     tableExport=tableResults.export('xlsx')
     outputFilePath = './output/' + str(datetime.now().microsecond) + '_plentas_output.xlsx'
@@ -193,30 +199,64 @@ def answersTodict(zip_path):
     Outputs:
         studentAnswersDict: The dictionary with all the responses
     """
     #extracting the data
     extractZipData(zip_path)
     studentAnswersDict = []
     #stacking the information of each extracted folder
     for work_folder in os.listdir(create_file_path("StudentAnswers/", doctype= 1)):
         for student, indx in zip(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1)), range(len(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1))))):
             student_name = student.split("(")
             student_name = student_name[0]
             try:
                 #opening the file
-                #fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'comments.txt', doctype= 1))
                 #where the actual response is
-                fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'Adjuntos del envio/Respuesta enviada', doctype= 1), encoding='utf-8')
-                #reading it
-                lineas = fichero.readlines()
-                #removing html
-                lineas[0] = removeHtmlFromString(lineas[0])
-                #saving it
-                studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx})
             except:
                 studentAnswersDict.append({"respuesta":"", "hashed_id":student_name, "TableIndex":indx})
@@ -225,16 +265,16 @@ def answersTodict(zip_path):
     save_json(create_file_path('ApiStudentsDict.json', doctype= 1),studentAnswersDict)
     return studentAnswersDict
 zipFileInput = gr.inputs.File(label="1. Selecciona el .ZIP con las respuestas de los alumnos")
 txtFileInput = gr.inputs.File(label="2. Selecciona el .txt con las preguntas y respuestas correctas. Escriba una pregunta en una sola línea y debajo la respuesta en la línea siguiente.")
 orthographyPercentage = gr.inputs.Textbox(label="Ortografía",lines=1, placeholder="0",default=0.1, numeric=1)
 syntaxPercentage = gr.inputs.Textbox(label="Sintaxis",lines=1, placeholder="0",default=0.1,numeric=1)
 semanticPercentage = gr.inputs.Textbox(label="Semántica",lines=1, placeholder="0",default=0.8, numeric=1)
 studentsRange = gr.inputs.Textbox(label="Estudiantes a evaluar",lines=1, placeholder="Dejar vacío para evaluar todos")
-#dataFrameOutput = gr.outputs.Dataframe(headers=["Resultados"], max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")
-labelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="")
 labelError = gr.outputs.Label(num_top_classes=None, type="auto", label="Errores")
 downloadExcelButton = gr.outputs.File('Resultados')
@@ -242,7 +282,8 @@ iface = gr.Interface(fn=Main
     , inputs=[zipFileInput, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange]
     , outputs=[labelError, downloadExcelButton]
     , title = "PLENTAS"
 )
-#iface.launch(share = False,enable_queue=True, show_error =True, server_port= 7861)
-iface.launch(share = False,enable_queue=True, show_error =True)

 import json
 from flask import jsonify
 from sentence_transformers import SentenceTransformer, InputExample, util
+from codeScripts.utils import save_json, load_json, create_file_path, remove
 from plentas import Plentas
 import pandas as pd
 import zipfile
 import shutil
 from datetime import datetime
 import tablib
+from pathlib import Path
 def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange):
         response = Plentas(config_json[0], [answersTodict(uploadedFilePath), createTeacherJson(configuration)])
         # # #overwriting the custom settings for the settings from the api
         response.setApiSettings(configuration)
+        try:
+            print("Processing!")
+            modelResult = response.processApiData()
+        except Exception as ex:
+            print("Error processing: " + str(ex))
         # modelJson = json.dumps(modelResult)
         studentData = item[index]
         excelData.append(studentData)
         index+= 1
+        tableResults = tablib.Dataset(headers=('ID', 'SimilitudSpacy', 'SimilitudBert', 'NotaSemanticaSpacy', 'NotaSemanticaBert', 'NotaSintaxis', 'NotaOrtografia','NotaTotalSpacy','NotaTotalBert','Feedback'))
     tableResults.json=json.dumps(excelData)
     tableExport=tableResults.export('xlsx')
     outputFilePath = './output/' + str(datetime.now().microsecond) + '_plentas_output.xlsx'
     Outputs:
         studentAnswersDict: The dictionary with all the responses
     """
+    # path
+    remove('api/StudentAnswers')
     #extracting the data
     extractZipData(zip_path)
     studentAnswersDict = []
+    indx2=0
     #stacking the information of each extracted folder
     for work_folder in os.listdir(create_file_path("StudentAnswers/", doctype= 1)):
+        print("work_folder: " + work_folder)
         for student, indx in zip(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1)), range(len(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1))))):
             student_name = student.split("(")
             student_name = student_name[0]
+            print("student: " + str(student) + " - index: " + str(indx))
             try:
                 #opening the file
+                fichero1 = create_file_path("StudentAnswers/" + work_folder + "/" + student+ "/" + 'Adjuntos del envio/', doctype= 1)
                 #where the actual response is
+                if os.path.exists(fichero1):
+                    fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'Adjuntos del envio/Respuesta enviada', doctype= 1), encoding='utf-8')
+                    #reading it
+                    lineas = fichero.readlines()
+                    #removing html
+                    lineas[0] = removeHtmlFromString(lineas[0])
+                    #saving it
+                    studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx})
+                elif os.path.exists(create_file_path("StudentAnswers/" + work_folder, doctype= 1)) :
+                    student_name2 = work_folder.split("_")
+                    student_name = student_name2[0]
+                    student_id2=student_name2[1]
+                    student_assingsubmission = student_name2[2]
+                    student_response = student_name2[3]
+                    if student_response=='onlinetext':
+                        fichero = open(create_file_path("StudentAnswers/" + work_folder+"/onlinetext.html", doctype= 1), encoding='utf-8')
+                        lineas = fichero.readlines()
+                        #removing html
+                        lineas[0] = removeHtmlFromString(lineas[0])
+                        #saving it
+                        indx2+=1
+                        studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx2})
+                        #break
             except:
                 studentAnswersDict.append({"respuesta":"", "hashed_id":student_name, "TableIndex":indx})
     save_json(create_file_path('ApiStudentsDict.json', doctype= 1),studentAnswersDict)
     return studentAnswersDict
 zipFileInput = gr.inputs.File(label="1. Selecciona el .ZIP con las respuestas de los alumnos")
 txtFileInput = gr.inputs.File(label="2. Selecciona el .txt con las preguntas y respuestas correctas. Escriba una pregunta en una sola línea y debajo la respuesta en la línea siguiente.")
 orthographyPercentage = gr.inputs.Textbox(label="Ortografía",lines=1, placeholder="0",default=0.1, numeric=1)
 syntaxPercentage = gr.inputs.Textbox(label="Sintaxis",lines=1, placeholder="0",default=0.1,numeric=1)
 semanticPercentage = gr.inputs.Textbox(label="Semántica",lines=1, placeholder="0",default=0.8, numeric=1)
 studentsRange = gr.inputs.Textbox(label="Estudiantes a evaluar",lines=1, placeholder="Dejar vacío para evaluar todos")
+        #dataFrameOutput = gr.outputs.Dataframe(headers=["Resultados"], max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")
+labelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="Output")
 labelError = gr.outputs.Label(num_top_classes=None, type="auto", label="Errores")
 downloadExcelButton = gr.outputs.File('Resultados')
     , inputs=[zipFileInput, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange]
     , outputs=[labelError, downloadExcelButton]
     , title = "PLENTAS"
 )
+iface.launch(share = False,enable_queue=True, show_error =True, server_port= 7861)
+#iface.launch(share = False,enable_queue=True, show_error =True)

codeScripts/methodologyPlentas.py CHANGED Viewed

@@ -95,7 +95,7 @@ class PlentasMethodology():
         esIntermedio = 0
         for umbralL, umbralH in zip(self.SemanticLevel.output.min_umbral, self.SemanticLevel.output.max_umbral):
             for minipregunta, similarity in zip(self.settings.indice_minipreguntas, similarity_array):
-                print(minipregunta, similarity)
                 if similarity >= umbralL:
                     if similarity <= umbralH:
                         if not esSuperior:

         esIntermedio = 0
         for umbralL, umbralH in zip(self.SemanticLevel.output.min_umbral, self.SemanticLevel.output.max_umbral):
             for minipregunta, similarity in zip(self.settings.indice_minipreguntas, similarity_array):
+                #print(minipregunta, similarity)
                 if similarity >= umbralL:
                     if similarity <= umbralH:
                         if not esSuperior:

codeScripts/rubrics.py CHANGED Viewed

@@ -130,6 +130,9 @@ class Sintaxis2():
         else:
             #obtaining FH and mu indexes
             sentencesLenght, wordsLenght, syll, letter_per_word = check_senteces_words(respuesta_alumno)
             FH, _ = FHuertas_index(sentencesLenght, wordsLenght, syll)
             mu, _ = mu_index(sentencesLenght, wordsLenght, letter_per_word)
@@ -142,6 +145,7 @@ class Sintaxis2():
             if nota_Sintaxis > self.settings.PesoSintaxis:
                 nota_Sintaxis = self.settings.PesoSintaxis
             self.settings.palabrasPorFrase = round(wordsLenght/sentencesLenght,2)
             return nota_Sintaxis

         else:
             #obtaining FH and mu indexes
             sentencesLenght, wordsLenght, syll, letter_per_word = check_senteces_words(respuesta_alumno)
+            print("wordsLenght: " + str(wordsLenght))
             FH, _ = FHuertas_index(sentencesLenght, wordsLenght, syll)
             mu, _ = mu_index(sentencesLenght, wordsLenght, letter_per_word)
             if nota_Sintaxis > self.settings.PesoSintaxis:
                 nota_Sintaxis = self.settings.PesoSintaxis
+            print("sentencesLenght: " + str(sentencesLenght))
             self.settings.palabrasPorFrase = round(wordsLenght/sentencesLenght,2)
             return nota_Sintaxis

codeScripts/rubricsOut.py CHANGED Viewed

@@ -49,8 +49,8 @@ class SemanticOutput():
         self.answersDF_json2["bert"] = dict()
         self.indiceMinipreguntas = settings.indice_minipreguntas
-        print("AAAAAAAAAAAA")
-        print(self.indiceMinipreguntas)
         self.LofRespThreshold = settings.LofRespThreshold
         self.indx = 1
@@ -152,15 +152,17 @@ class SemanticOutput():
             -calculatedMark: The calculated mark.
             -response: The student's response
         """
-        print("ZZZZZ")
-        print(similarity_type)
         #storing calculated marks
         self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas))
         #storing where the model thought the answer was
         for minipregunta in self.indiceMinipreguntas:
-            print("EEEEE")
-            print(self.identifyLineofResponse)
             aux = copy.deepcopy(self.identifyLineofResponse)
             for indx in aux[similarity_type][studentID][minipregunta].keys():
                 if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075:

         self.answersDF_json2["bert"] = dict()
         self.indiceMinipreguntas = settings.indice_minipreguntas
+        #print("AAAAAAAAAAAA")
+        #print(self.indiceMinipreguntas)
         self.LofRespThreshold = settings.LofRespThreshold
         self.indx = 1
             -calculatedMark: The calculated mark.
             -response: The student's response
         """
+        #print("ZZZZZ")
+        #print(similarity_type)
         #storing calculated marks
+        print("indiceMinipreguntas: " + str(self.indiceMinipreguntas))
         self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas))
         #storing where the model thought the answer was
         for minipregunta in self.indiceMinipreguntas:
+            #print("EEEEE")
+            #print(self.identifyLineofResponse)
             aux = copy.deepcopy(self.identifyLineofResponse)
             for indx in aux[similarity_type][studentID][minipregunta].keys():
                 if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075:

codeScripts/utils.py CHANGED Viewed

@@ -9,6 +9,8 @@ from nltk import ne_chunk
 import re
 import yake
 import spacy
 #dic = hunspell.Hunspell('/Users/miguel.r/Desktop/UNIR/PLenTaS/CORPUS/dict_es_ES/es_ES', '/Users/miguel.r/Desktop/es_ES/es_ES.dic')
 nlp = spacy.load('es_core_news_sm') # Paquete spaCy en español (es)
@@ -129,7 +131,7 @@ def spelling_corrector(student_answer, hunspell_aff = '/Users/javier.sanz/OneDri
     return errors,wrong_words
 # Legibilidad de la respuesta en función del índice Fernández-Huerta
-def FHuertas_index(sentencesLenght, wordsLenght, syll):
     FH = 206.84 - 0.60*(syll*100/wordsLenght) - 1.02*(sentencesLenght*100/wordsLenght)
     FH = round(FH, 3)
     legibilidad_fh = ""
@@ -162,7 +164,12 @@ def FHuertas_index(sentencesLenght, wordsLenght, syll):
 def mu_index(sentencesLenght, wordsLenght, letter_per_word):
     med = np.mean(letter_per_word)
     var = np.var(letter_per_word)
-    mu=(wordsLenght/(wordsLenght-1))*(med/var)*100
     mu=round(mu, 3)
     legibilidad_mu = ""
@@ -337,4 +344,13 @@ def create_file_path(file, doctype):
         path = "archivos/Images/" + file
     else:
         path = "codeScripts/Dependencies/BERT-models/Prueba3/" + file
-    return path

 import re
 import yake
 import spacy
+import os
+import shutil
 #dic = hunspell.Hunspell('/Users/miguel.r/Desktop/UNIR/PLenTaS/CORPUS/dict_es_ES/es_ES', '/Users/miguel.r/Desktop/es_ES/es_ES.dic')
 nlp = spacy.load('es_core_news_sm') # Paquete spaCy en español (es)
     return errors,wrong_words
 # Legibilidad de la respuesta en función del índice Fernández-Huerta
+def FHuertas_index(sentencesLenght, wordsLenght, syll):
     FH = 206.84 - 0.60*(syll*100/wordsLenght) - 1.02*(sentencesLenght*100/wordsLenght)
     FH = round(FH, 3)
     legibilidad_fh = ""
 def mu_index(sentencesLenght, wordsLenght, letter_per_word):
     med = np.mean(letter_per_word)
     var = np.var(letter_per_word)
+    try:
+        mu=(wordsLenght/(wordsLenght-1))*(med/var)*100
+    except Exception as ex:
+        print("Error on mu_index: " + str(ex))
+        mu = 0
     mu=round(mu, 3)
     legibilidad_mu = ""
         path = "archivos/Images/" + file
     else:
         path = "codeScripts/Dependencies/BERT-models/Prueba3/" + file
+    return path
+def remove(path):
+    """ param <path> could either be relative or absolute. """
+    if os.path.isfile(path) or os.path.islink(path):
+        os.remove(path)  # remove the file
+    elif os.path.isdir(path):
+        shutil.rmtree(path)  # remove dir and all contains
+    else:
+        raise ValueError("file {} is not a file or dir.".format(path))

plentas.py CHANGED Viewed

@@ -20,7 +20,7 @@ class Plentas():
     def __jsonToExcel__(self, jsonFile):
         outputExcel = dict()
-        print(jsonFile)
         for student in jsonFile:
             for numb_id in student.keys():
                 for column in student[numb_id].keys():
@@ -59,8 +59,13 @@ class Plentas():
         AnalysisOfResponses = []
         IDs = getIDrange(self.settings.rango_ID, self.settings.answersDF)
         for id in IDs:
             studentID = self.settings.answersDF['hashed_id'][id]
             self.settings.studentID = studentID
             nota_rubrica_spacy = 0
@@ -144,7 +149,7 @@ class Plentas():
         if self.settings.Ortografia:
             self.ortografia.SaveMistakes()
-        print(AnalysisOfResponses)
         return AnalysisOfResponses

     def __jsonToExcel__(self, jsonFile):
         outputExcel = dict()
+        #print(jsonFile)
         for student in jsonFile:
             for numb_id in student.keys():
                 for column in student[numb_id].keys():
         AnalysisOfResponses = []
         IDs = getIDrange(self.settings.rango_ID, self.settings.answersDF)
+        print("Total IDS: " + str(len(IDs)))
         for id in IDs:
             studentID = self.settings.answersDF['hashed_id'][id]
+            print("StudentID: " + studentID)
             self.settings.studentID = studentID
             nota_rubrica_spacy = 0
         if self.settings.Ortografia:
             self.ortografia.SaveMistakes()
+        #print(AnalysisOfResponses)
         return AnalysisOfResponses