plentas / app.py
xiomarablanco's picture
ComentarioVerificarSiExisteArchivoArriba
712b2f5
import gradio as gr
import json
from flask import jsonify
from sentence_transformers import SentenceTransformer, InputExample, util
from codeScripts.utils import save_json, load_json, create_file_path
from plentas import Plentas
import pandas as pd
import zipfile
import os
import shutil
from datetime import datetime
import tablib
def Main(uploadedFile, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange):
error = ""
excelPath = ""
copySpanishDictionaries()
try:
# if not txtFileInput:
# txtFileInput = "./assets/qa.txt"
#else:
txtFileInput = txtFileInput.name
configuration = readQATextFile(txtFileInput)
configuration["ortographyPercentage"] = float(orthographyPercentage)
configuration["syntaxPercentage"] = float(syntaxPercentage)
configuration["semanticPercentage"] = float(semanticPercentage)
if studentsRange == "":
studentsRange = "All"
configuration["students"] = studentsRange
#if not uploadedFile:
# uploadedFilePath = "./assets/test_data.zip"
#else:
uploadedFilePath = uploadedFile.name
config_json = load_json("configV2.json")
# #configuring plentas methodology
response = Plentas(config_json[0], [answersTodict(uploadedFilePath), createTeacherJson(configuration)])
# # #overwriting the custom settings for the settings from the api
response.setApiSettings(configuration)
modelResult = response.processApiData()
# modelJson = json.dumps(modelResult)
excelPath = exportResultToExcelFile(modelResult)
except Exception as e:
error = "Oops: " + str(e)
return [error, excelPath]
def exportResultToExcelFile(modelResult):
excelData = []
studentsArray = modelResult[0]
index = 0
for item in studentsArray:
print("ITEM - " + str(item))
studentData = item[index]
excelData.append(studentData)
index+= 1
tableResults = tablib.Dataset(headers=('ID', 'SimilitudSpacy', 'SimilitudBert', 'NotaSemanticaSpacy', 'NotaSemanticaBert', 'NotaSintaxis', 'NotaOrtografia','NotaTotalSpacy','NotaTotalBert','Feedback'))
tableResults.json=json.dumps(excelData)
tableExport=tableResults.export('xlsx')
outputFilePath = './output/' + str(datetime.now().microsecond) + '_plentas_output.xlsx'
# outputFilePath = './output/plentas_output.xlsx'
with open(outputFilePath, 'wb') as f: # open the xlsx file
f.write(tableExport) # write the dataset to the xlsx file
f.close()
return outputFilePath
def copySpanishDictionaries():
try:
shutil.copy("./assets/hunspell_dictionaries/es_ES/es_ES.aff", "/home/user/.local/lib/python3.8/site-packages/hunspell/dictionaries/es_ES.aff")
shutil.copy("./assets/hunspell_dictionaries/es_ES/es_ES.dic", "/home/user/.local/lib/python3.8/site-packages/hunspell/dictionaries/es_ES.dic")
except Exception as ex:
print("Error copying dictionaries" + str(ex))
def readQATextFile(qaTextFilePath):
configuration = {}
f = open(qaTextFilePath, 'r')
lines = f.readlines()
count = 0
qCount=1
q = ""
a = ""
while count < len(lines):
if q == "" or q == "\n":
q = lines[count]
count += 1
continue
if a == "" or a == "\n":
a = lines[count]
count += 1
if q != "" and a != "":
configuration["minip" + str(qCount)] = q
configuration["minir" + str(qCount)] = a
qCount += 1
q = ""
a = ""
return configuration
def createTeacherJson(configuration):
"""
This function extracts the information about the subquestions and subanswers and puts them in the correct format.
Inputs:
config: The configured info from the api.
Outputs:
teachersJson: The generated dictionary with the subquestions.
"""
teachersJson = {"enunciado": "", "minipreguntas":[], "keywords":""}
#5 is the maximum number of permitted subquestions in the configuration2 page
for i in range(5):
try:
teachersJson["minipreguntas"].append({
"minipregunta": configuration["minip" + str(i+1)],
"minirespuesta": configuration["minir" + str(i+1)]
})
except:
break
return teachersJson
def extractZipData(ruta_zip):
"""
This function extracts the students's answers from the zip file (the one the teacher has in the task section).
Inputs:
ruta_zip: The path inherited from answersTodict
"""
#defining the path where the extracted info is to be stored
ruta_extraccion = create_file_path("StudentAnswers/", doctype= 1)
#extracting the info
archivo_zip = zipfile.ZipFile(ruta_zip, "r")
try:
archivo_zip.extractall(pwd=None, path=ruta_extraccion)
except:
pass
archivo_zip.close()
def removeHtmlFromString(string):
"""
This function removes the html tags from the student's response.
Inputs:
-string: The student's response
Outputs:
-new_string: The filtered response
"""
string = string.encode('utf-8', 'replace')
string = string.decode('utf-8', 'replace')
new_string = ""
skipChar = 0
for char in string:
if char == "<":
skipChar = 1
elif char == ">":
skipChar = 0
else:
if not skipChar:
new_string = new_string+char
new_string = new_string.encode('utf-8', 'replace')
new_string = new_string.decode('utf-8', 'replace')
return new_string
def answersTodict(zip_path):
"""
This function extracts the students's answers and stacks them in one specific format so that it can be processed next.
Inputs:
ruta_zip: The path where the zip file is stored
Outputs:
studentAnswersDict: The dictionary with all the responses
"""
#extracting the data
extractZipData(zip_path)
studentAnswersDict = []
#stacking the information of each extracted folder
for work_folder in os.listdir(create_file_path("StudentAnswers/", doctype= 1)):
for student, indx in zip(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1)), range(len(os.listdir(create_file_path("StudentAnswers/" + work_folder, doctype= 1))))):
student_name = student.split("(")
student_name = student_name[0]
try:
#opening the file
#fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'comments.txt', doctype= 1))
#where the actual response is
fichero = open(create_file_path("StudentAnswers/" + work_folder + "/" + student + "/" + 'Adjuntos del envio/Respuesta enviada', doctype= 1), encoding='utf-8')
#reading it
lineas = fichero.readlines()
#removing html
lineas[0] = removeHtmlFromString(lineas[0])
#saving it
studentAnswersDict.append({"respuesta":lineas[0], "hashed_id":student_name, "TableIndex":indx})
except:
studentAnswersDict.append({"respuesta":"", "hashed_id":student_name, "TableIndex":indx})
#saving the final dictionary
save_json(create_file_path('ApiStudentsDict.json', doctype= 1),studentAnswersDict)
return studentAnswersDict
zipFileInput = gr.inputs.File(label="1. Selecciona el .ZIP con las respuestas de los alumnos")
txtFileInput = gr.inputs.File(label="2. Selecciona el .txt con las preguntas y respuestas correctas. Escriba una pregunta en una sola línea y debajo la respuesta en la línea siguiente.")
orthographyPercentage = gr.inputs.Textbox(label="Ortografía",lines=1, placeholder="0",default=0.1, numeric=1)
syntaxPercentage = gr.inputs.Textbox(label="Sintaxis",lines=1, placeholder="0",default=0.1,numeric=1)
semanticPercentage = gr.inputs.Textbox(label="Semántica",lines=1, placeholder="0",default=0.8, numeric=1)
studentsRange = gr.inputs.Textbox(label="Estudiantes a evaluar",lines=1, placeholder="Dejar vacío para evaluar todos")
#dataFrameOutput = gr.outputs.Dataframe(headers=["Resultados"], max_rows=20, max_cols=None, overflow_row_behaviour="paginate", type="pandas", label="Resultado")
labelOutput = gr.outputs.Label(num_top_classes=None, type="auto", label="")
labelError = gr.outputs.Label(num_top_classes=None, type="auto", label="Errores")
downloadExcelButton = gr.outputs.File('Resultados')
iface = gr.Interface(fn=Main
, inputs=[zipFileInput, txtFileInput, orthographyPercentage, syntaxPercentage, semanticPercentage, studentsRange]
, outputs=[labelError, downloadExcelButton]
, title = "PLENTAS"
)
#iface.launch(share = False,enable_queue=True, show_error =True, server_port= 7861)
iface.launch(share = False,enable_queue=True, show_error =True)