xiomarablanco commited on
Commit
ce84b09
1 Parent(s): 0562415

Procesando respuestas de Moodle

Browse files
.gitignore CHANGED
@@ -2,3 +2,6 @@ api/StudentAnswers/*
2
  codeScripts/__pycache__/*
3
  codeScripts/Dependencies/__pycache__/*
4
  __pycache__/*
 
 
 
 
2
  codeScripts/__pycache__/*
3
  codeScripts/Dependencies/__pycache__/*
4
  __pycache__/*
5
+ output/*
6
+ archivos/*
7
+ flagged/*
app.py CHANGED
@@ -2,7 +2,7 @@ import gradio as gr
2
  import json
3
  from flask import jsonify
4
  from sentence_transformers import SentenceTransformer, InputExample, util
5
- from codeScripts.utils import save_json, load_json, create_file_path
6
  from plentas import Plentas
7
  import pandas as pd
8
  import zipfile
@@ -10,6 +10,7 @@ import os
10
  import shutil
11
  from datetime import datetime
12
  import tablib
 
13
 
14
  def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange):
15
 
@@ -48,8 +49,12 @@ def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, se
48
  response = Plentas(config_json[0], [answersTodict(uploadedFilePath), createTeacherJson(configuration)])
49
  # # #overwriting the custom settings for the settings from the api
50
  response.setApiSettings(configuration)
51
-
52
- modelResult = response.processApiData()
 
 
 
 
53
 
54
  # modelJson = json.dumps(modelResult)
55
 
@@ -71,8 +76,9 @@ def exportResultToExcelFile(modelResult):
71
  studentData = item[index]
72
  excelData.append(studentData)
73
  index+= 1
 
 
74
 
75
- tableResults = tablib.Dataset(headers=('ID', 'SimilitudSpacy', 'SimilitudBert', 'NotaSemanticaSpacy', 'NotaSemanticaBert', 'NotaSintaxis', 'NotaOrtografia','NotaTotalSpacy','NotaTotalBert','Feedback'))
76
  tableResults.json=json.dumps(excelData)
77
  tableExport=tableResults.export('xlsx')
78
  outputFilePath = './output/' + str(datetime.now().microsecond) + '_plentas_output.xlsx'
@@ -193,30 +199,64 @@ def answersTodict(zip_path):
193
  Outputs:
194
  studentAnswersDict: The dictionary with all the responses
195
  """
 
 
 
 
196
  #extracting the data
197
  extractZipData(zip_path)
198
 
199
  studentAnswersDict = []
 
200
 
201
  #stacking the information of each extracted folder
202
  for work_folder in os.listdir(create_file_path("StudentAnswers/", doctype= 1)):
 
 
203
  for student, indx in zip(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1)), range(len(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1))))):
204
  student_name = student.split("(")
205
  student_name = student_name[0]
 
 
 
206
  try:
207
  #opening the file
 
 
208
 
209
- #fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'comments.txt', doctype= 1))
210
  #where the actual response is
211
- fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'Adjuntos del envio/Respuesta enviada', doctype= 1), encoding='utf-8')
212
- #reading it
213
- lineas = fichero.readlines()
 
 
 
214
 
215
- #removing html
216
- lineas[0] = removeHtmlFromString(lineas[0])
217
 
218
- #saving it
219
- studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx})
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
  except:
222
  studentAnswersDict.append({"respuesta":"", "hashed_id":student_name, "TableIndex":indx})
@@ -225,16 +265,16 @@ def answersTodict(zip_path):
225
  save_json(create_file_path('ApiStudentsDict.json', doctype= 1),studentAnswersDict)
226
  return studentAnswersDict
227
 
228
-
229
  zipFileInput = gr.inputs.File(label="1. Selecciona el .ZIP con las respuestas de los alumnos")
230
  txtFileInput = gr.inputs.File(label="2. Selecciona el .txt con las preguntas y respuestas correctas. Escriba una pregunta en una sola línea y debajo la respuesta en la línea siguiente.")
231
  orthographyPercentage = gr.inputs.Textbox(label="Ortografía",lines=1, placeholder="0",default=0.1, numeric=1)
232
  syntaxPercentage = gr.inputs.Textbox(label="Sintaxis",lines=1, placeholder="0",default=0.1,numeric=1)
233
  semanticPercentage = gr.inputs.Textbox(label="Semántica",lines=1, placeholder="0",default=0.8, numeric=1)
234
  studentsRange = gr.inputs.Textbox(label="Estudiantes a evaluar",lines=1, placeholder="Dejar vacío para evaluar todos")
235
- #dataFrameOutput = gr.outputs.Dataframe(headers=["Resultados"], max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")
236
 
237
- labelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="")
238
  labelError = gr.outputs.Label(num_top_classes=None, type="auto", label="Errores")
239
  downloadExcelButton = gr.outputs.File('Resultados')
240
 
@@ -242,7 +282,8 @@ iface = gr.Interface(fn=Main
242
  , inputs=[zipFileInput, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange]
243
  , outputs=[labelError, downloadExcelButton]
244
  , title = "PLENTAS"
 
245
  )
246
 
247
- #iface.launch(share = False,enable_queue=True, show_error =True, server_port= 7861)
248
- iface.launch(share = False,enable_queue=True, show_error =True)
 
2
  import json
3
  from flask import jsonify
4
  from sentence_transformers import SentenceTransformer, InputExample, util
5
+ from codeScripts.utils import save_json, load_json, create_file_path, remove
6
  from plentas import Plentas
7
  import pandas as pd
8
  import zipfile
 
10
  import shutil
11
  from datetime import datetime
12
  import tablib
13
+ from pathlib import Path
14
 
15
  def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange):
16
 
 
49
  response = Plentas(config_json[0], [answersTodict(uploadedFilePath), createTeacherJson(configuration)])
50
  # # #overwriting the custom settings for the settings from the api
51
  response.setApiSettings(configuration)
52
+
53
+ try:
54
+ print("Processing!")
55
+ modelResult = response.processApiData()
56
+ except Exception as ex:
57
+ print("Error processing: " + str(ex))
58
 
59
  # modelJson = json.dumps(modelResult)
60
 
 
76
  studentData = item[index]
77
  excelData.append(studentData)
78
  index+= 1
79
+
80
+ tableResults = tablib.Dataset(headers=('ID', 'SimilitudSpacy', 'SimilitudBert', 'NotaSemanticaSpacy', 'NotaSemanticaBert', 'NotaSintaxis', 'NotaOrtografia','NotaTotalSpacy','NotaTotalBert','Feedback'))
81
 
 
82
  tableResults.json=json.dumps(excelData)
83
  tableExport=tableResults.export('xlsx')
84
  outputFilePath = './output/' + str(datetime.now().microsecond) + '_plentas_output.xlsx'
 
199
  Outputs:
200
  studentAnswersDict: The dictionary with all the responses
201
  """
202
+
203
+ # path
204
+ remove('api/StudentAnswers')
205
+
206
  #extracting the data
207
  extractZipData(zip_path)
208
 
209
  studentAnswersDict = []
210
+ indx2=0
211
 
212
  #stacking the information of each extracted folder
213
  for work_folder in os.listdir(create_file_path("StudentAnswers/", doctype= 1)):
214
+ print("work_folder: " + work_folder)
215
+
216
  for student, indx in zip(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1)), range(len(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1))))):
217
  student_name = student.split("(")
218
  student_name = student_name[0]
219
+
220
+ print("student: " + str(student) + " - index: " + str(indx))
221
+
222
  try:
223
  #opening the file
224
+
225
+ fichero1 = create_file_path("StudentAnswers/" + work_folder + "/" + student+ "/" + 'Adjuntos del envio/', doctype= 1)
226
 
 
227
  #where the actual response is
228
+
229
+
230
+ if os.path.exists(fichero1):
231
+ fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'Adjuntos del envio/Respuesta enviada', doctype= 1), encoding='utf-8')
232
+ #reading it
233
+ lineas = fichero.readlines()
234
 
235
+ #removing html
236
+ lineas[0] = removeHtmlFromString(lineas[0])
237
 
238
+ #saving it
239
+ studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx})
240
+
241
+ elif os.path.exists(create_file_path("StudentAnswers/" + work_folder, doctype= 1)) :
242
+ student_name2 = work_folder.split("_")
243
+ student_name = student_name2[0]
244
+ student_id2=student_name2[1]
245
+ student_assingsubmission = student_name2[2]
246
+ student_response = student_name2[3]
247
+
248
+ if student_response=='onlinetext':
249
+ fichero = open(create_file_path("StudentAnswers/" + work_folder+"/onlinetext.html", doctype= 1), encoding='utf-8')
250
+
251
+ lineas = fichero.readlines()
252
+
253
+ #removing html
254
+ lineas[0] = removeHtmlFromString(lineas[0])
255
+
256
+ #saving it
257
+ indx2+=1
258
+ studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx2})
259
+ #break
260
 
261
  except:
262
  studentAnswersDict.append({"respuesta":"", "hashed_id":student_name, "TableIndex":indx})
 
265
  save_json(create_file_path('ApiStudentsDict.json', doctype= 1),studentAnswersDict)
266
  return studentAnswersDict
267
 
268
+
269
  zipFileInput = gr.inputs.File(label="1. Selecciona el .ZIP con las respuestas de los alumnos")
270
  txtFileInput = gr.inputs.File(label="2. Selecciona el .txt con las preguntas y respuestas correctas. Escriba una pregunta en una sola línea y debajo la respuesta en la línea siguiente.")
271
  orthographyPercentage = gr.inputs.Textbox(label="Ortografía",lines=1, placeholder="0",default=0.1, numeric=1)
272
  syntaxPercentage = gr.inputs.Textbox(label="Sintaxis",lines=1, placeholder="0",default=0.1,numeric=1)
273
  semanticPercentage = gr.inputs.Textbox(label="Semántica",lines=1, placeholder="0",default=0.8, numeric=1)
274
  studentsRange = gr.inputs.Textbox(label="Estudiantes a evaluar",lines=1, placeholder="Dejar vacío para evaluar todos")
275
+ #dataFrameOutput = gr.outputs.Dataframe(headers=["Resultados"], max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")
276
 
277
+ labelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="Output")
278
  labelError = gr.outputs.Label(num_top_classes=None, type="auto", label="Errores")
279
  downloadExcelButton = gr.outputs.File('Resultados')
280
 
 
282
  , inputs=[zipFileInput, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange]
283
  , outputs=[labelError, downloadExcelButton]
284
  , title = "PLENTAS"
285
+
286
  )
287
 
288
+ iface.launch(share = False,enable_queue=True, show_error =True, server_port= 7861)
289
+ #iface.launch(share = False,enable_queue=True, show_error =True)
codeScripts/methodologyPlentas.py CHANGED
@@ -95,7 +95,7 @@ class PlentasMethodology():
95
  esIntermedio = 0
96
  for umbralL, umbralH in zip(self.SemanticLevel.output.min_umbral, self.SemanticLevel.output.max_umbral):
97
  for minipregunta, similarity in zip(self.settings.indice_minipreguntas, similarity_array):
98
- print(minipregunta, similarity)
99
  if similarity >= umbralL:
100
  if similarity <= umbralH:
101
  if not esSuperior:
 
95
  esIntermedio = 0
96
  for umbralL, umbralH in zip(self.SemanticLevel.output.min_umbral, self.SemanticLevel.output.max_umbral):
97
  for minipregunta, similarity in zip(self.settings.indice_minipreguntas, similarity_array):
98
+ #print(minipregunta, similarity)
99
  if similarity >= umbralL:
100
  if similarity <= umbralH:
101
  if not esSuperior:
codeScripts/rubrics.py CHANGED
@@ -130,6 +130,9 @@ class Sintaxis2():
130
  else:
131
  #obtaining FH and mu indexes
132
  sentencesLenght, wordsLenght, syll, letter_per_word = check_senteces_words(respuesta_alumno)
 
 
 
133
  FH, _ = FHuertas_index(sentencesLenght, wordsLenght, syll)
134
  mu, _ = mu_index(sentencesLenght, wordsLenght, letter_per_word)
135
 
@@ -142,6 +145,7 @@ class Sintaxis2():
142
  if nota_Sintaxis > self.settings.PesoSintaxis:
143
  nota_Sintaxis = self.settings.PesoSintaxis
144
 
 
145
  self.settings.palabrasPorFrase = round(wordsLenght/sentencesLenght,2)
146
  return nota_Sintaxis
147
 
 
130
  else:
131
  #obtaining FH and mu indexes
132
  sentencesLenght, wordsLenght, syll, letter_per_word = check_senteces_words(respuesta_alumno)
133
+
134
+ print("wordsLenght: " + str(wordsLenght))
135
+
136
  FH, _ = FHuertas_index(sentencesLenght, wordsLenght, syll)
137
  mu, _ = mu_index(sentencesLenght, wordsLenght, letter_per_word)
138
 
 
145
  if nota_Sintaxis > self.settings.PesoSintaxis:
146
  nota_Sintaxis = self.settings.PesoSintaxis
147
 
148
+ print("sentencesLenght: " + str(sentencesLenght))
149
  self.settings.palabrasPorFrase = round(wordsLenght/sentencesLenght,2)
150
  return nota_Sintaxis
151
 
codeScripts/rubricsOut.py CHANGED
@@ -49,8 +49,8 @@ class SemanticOutput():
49
  self.answersDF_json2["bert"] = dict()
50
 
51
  self.indiceMinipreguntas = settings.indice_minipreguntas
52
- print("AAAAAAAAAAAA")
53
- print(self.indiceMinipreguntas)
54
  self.LofRespThreshold = settings.LofRespThreshold
55
 
56
  self.indx = 1
@@ -152,15 +152,17 @@ class SemanticOutput():
152
  -calculatedMark: The calculated mark.
153
  -response: The student's response
154
  """
155
- print("ZZZZZ")
156
- print(similarity_type)
157
  #storing calculated marks
 
 
158
  self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas))
159
 
160
  #storing where the model thought the answer was
161
  for minipregunta in self.indiceMinipreguntas:
162
- print("EEEEE")
163
- print(self.identifyLineofResponse)
164
  aux = copy.deepcopy(self.identifyLineofResponse)
165
  for indx in aux[similarity_type][studentID][minipregunta].keys():
166
  if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075:
 
49
  self.answersDF_json2["bert"] = dict()
50
 
51
  self.indiceMinipreguntas = settings.indice_minipreguntas
52
+ #print("AAAAAAAAAAAA")
53
+ #print(self.indiceMinipreguntas)
54
  self.LofRespThreshold = settings.LofRespThreshold
55
 
56
  self.indx = 1
 
152
  -calculatedMark: The calculated mark.
153
  -response: The student's response
154
  """
155
+ #print("ZZZZZ")
156
+ #print(similarity_type)
157
  #storing calculated marks
158
+ print("indiceMinipreguntas: " + str(self.indiceMinipreguntas))
159
+
160
  self.notas_calculadas[similarity_type]['Umbral ' + str(umbralL) + ' - ' + str(umbralH)].append(0 if response == "" else calculatedMark/len(self.indiceMinipreguntas))
161
 
162
  #storing where the model thought the answer was
163
  for minipregunta in self.indiceMinipreguntas:
164
+ #print("EEEEE")
165
+ #print(self.identifyLineofResponse)
166
  aux = copy.deepcopy(self.identifyLineofResponse)
167
  for indx in aux[similarity_type][studentID][minipregunta].keys():
168
  if abs(self.identifyLineofResponse[similarity_type][studentID][minipregunta][indx]["Similitud"] - self.nota_spacy_experimento[similarity_type][studentID]["1"]["MaxSimilitud"]) > 0.075:
codeScripts/utils.py CHANGED
@@ -9,6 +9,8 @@ from nltk import ne_chunk
9
  import re
10
  import yake
11
  import spacy
 
 
12
  #dic = hunspell.Hunspell('/Users/miguel.r/Desktop/UNIR/PLenTaS/CORPUS/dict_es_ES/es_ES', '/Users/miguel.r/Desktop/es_ES/es_ES.dic')
13
 
14
  nlp = spacy.load('es_core_news_sm') # Paquete spaCy en español (es)
@@ -129,7 +131,7 @@ def spelling_corrector(student_answer, hunspell_aff = '/Users/javier.sanz/OneDri
129
  return errors,wrong_words
130
 
131
  # Legibilidad de la respuesta en función del índice Fernández-Huerta
132
- def FHuertas_index(sentencesLenght, wordsLenght, syll):
133
  FH = 206.84 - 0.60*(syll*100/wordsLenght) - 1.02*(sentencesLenght*100/wordsLenght)
134
  FH = round(FH, 3)
135
  legibilidad_fh = ""
@@ -162,7 +164,12 @@ def FHuertas_index(sentencesLenght, wordsLenght, syll):
162
  def mu_index(sentencesLenght, wordsLenght, letter_per_word):
163
  med = np.mean(letter_per_word)
164
  var = np.var(letter_per_word)
165
- mu=(wordsLenght/(wordsLenght-1))*(med/var)*100
 
 
 
 
 
166
  mu=round(mu, 3)
167
 
168
  legibilidad_mu = ""
@@ -337,4 +344,13 @@ def create_file_path(file, doctype):
337
  path = "archivos/Images/" + file
338
  else:
339
  path = "codeScripts/Dependencies/BERT-models/Prueba3/" + file
340
- return path
 
 
 
 
 
 
 
 
 
 
9
  import re
10
  import yake
11
  import spacy
12
+ import os
13
+ import shutil
14
  #dic = hunspell.Hunspell('/Users/miguel.r/Desktop/UNIR/PLenTaS/CORPUS/dict_es_ES/es_ES', '/Users/miguel.r/Desktop/es_ES/es_ES.dic')
15
 
16
  nlp = spacy.load('es_core_news_sm') # Paquete spaCy en español (es)
 
131
  return errors,wrong_words
132
 
133
  # Legibilidad de la respuesta en función del índice Fernández-Huerta
134
+ def FHuertas_index(sentencesLenght, wordsLenght, syll):
135
  FH = 206.84 - 0.60*(syll*100/wordsLenght) - 1.02*(sentencesLenght*100/wordsLenght)
136
  FH = round(FH, 3)
137
  legibilidad_fh = ""
 
164
  def mu_index(sentencesLenght, wordsLenght, letter_per_word):
165
  med = np.mean(letter_per_word)
166
  var = np.var(letter_per_word)
167
+ try:
168
+ mu=(wordsLenght/(wordsLenght-1))*(med/var)*100
169
+ except Exception as ex:
170
+ print("Error on mu_index: " + str(ex))
171
+ mu = 0
172
+
173
  mu=round(mu, 3)
174
 
175
  legibilidad_mu = ""
 
344
  path = "archivos/Images/" + file
345
  else:
346
  path = "codeScripts/Dependencies/BERT-models/Prueba3/" + file
347
+ return path
348
+
349
+ def remove(path):
350
+ """ param <path> could either be relative or absolute. """
351
+ if os.path.isfile(path) or os.path.islink(path):
352
+ os.remove(path) # remove the file
353
+ elif os.path.isdir(path):
354
+ shutil.rmtree(path) # remove dir and all contains
355
+ else:
356
+ raise ValueError("file {} is not a file or dir.".format(path))
plentas.py CHANGED
@@ -20,7 +20,7 @@ class Plentas():
20
 
21
  def __jsonToExcel__(self, jsonFile):
22
  outputExcel = dict()
23
- print(jsonFile)
24
  for student in jsonFile:
25
  for numb_id in student.keys():
26
  for column in student[numb_id].keys():
@@ -59,8 +59,13 @@ class Plentas():
59
 
60
  AnalysisOfResponses = []
61
  IDs = getIDrange(self.settings.rango_ID, self.settings.answersDF)
 
 
 
62
  for id in IDs:
63
  studentID = self.settings.answersDF['hashed_id'][id]
 
 
64
  self.settings.studentID = studentID
65
 
66
  nota_rubrica_spacy = 0
@@ -144,7 +149,7 @@ class Plentas():
144
  if self.settings.Ortografia:
145
  self.ortografia.SaveMistakes()
146
 
147
- print(AnalysisOfResponses)
148
  return AnalysisOfResponses
149
 
150
 
 
20
 
21
  def __jsonToExcel__(self, jsonFile):
22
  outputExcel = dict()
23
+ #print(jsonFile)
24
  for student in jsonFile:
25
  for numb_id in student.keys():
26
  for column in student[numb_id].keys():
 
59
 
60
  AnalysisOfResponses = []
61
  IDs = getIDrange(self.settings.rango_ID, self.settings.answersDF)
62
+
63
+ print("Total IDS: " + str(len(IDs)))
64
+
65
  for id in IDs:
66
  studentID = self.settings.answersDF['hashed_id'][id]
67
+
68
+ print("StudentID: " + studentID)
69
  self.settings.studentID = studentID
70
 
71
  nota_rubrica_spacy = 0
 
149
  if self.settings.Ortografia:
150
  self.ortografia.SaveMistakes()
151
 
152
+ #print(AnalysisOfResponses)
153
  return AnalysisOfResponses
154
 
155