from docx import Document import itertools from datetime import date import gradio as gr def docparser(file_obj): wordDoc = Document(file_obj) current_year = date.today().year semester = wordDoc.tables[1].rows[1].cells[0].text adcademicyear = wordDoc.tables[1].rows[1].cells[1].text studentname = wordDoc.tables[2].rows[0].cells[2].text transfercollege = wordDoc.tables[2].rows[0].cells[4].text currentcollege = wordDoc.tables[2].rows[1].cells[2].text major = wordDoc.tables[2].rows[1].cells[4].text studentid = wordDoc.tables[2].rows[2].cells[2].text nationalid = wordDoc.tables[2].rows[2].cells[4].text studentcoursesdict = {"ids":[],"names":[],"credits":[]} collegecoursesdict = {"ids":[],"names":[],"credits":[]} slist = ["ids","names","credits"] index = 1 for row in wordDoc.tables[3].rows: if row.cells[0].text[0].isdigit(): for element in slist: collegecoursesdict[element].append(row.cells[index].text) index = index + 1 for element in slist: studentcoursesdict[element].append(row.cells[index].text) index = index + 1 index = 1 for element in slist: studentcoursesdict[element] = list(filter(None, studentcoursesdict[element])) collegecoursesdict[element] = list(filter(None, collegecoursesdict[element])) courses = "" for (id1, name1, credit1,id2,name2,credit2) in itertools.zip_longest(studentcoursesdict["ids"], studentcoursesdict["names"], studentcoursesdict["credits"],collegecoursesdict["ids"], collegecoursesdict["names"], collegecoursesdict["credits"], fillvalue=-1): if(id1 == -1 or id2 == -1 or name1 == -1 or name2 == -1 or credit1 == -1 or credit2 == -1): courses = courses + "Problem!\nEmpty cell detected\n" continue courses = "\n"+courses + id1+" "+name1+" "+credit1+" is equivalent "+id2+" "+name2+" "+credit2 + "\n" data = "Academic year: {1}\nSemester: {0}\nStudent name: {2}\nCurrent college: {4}\nCollege to transfer to: {3}\nMajor: {5}\nStudent ID:{6}\nNational ID: {7}".format( semester, adcademicyear, studentname, transfercollege, currentcollege, major, studentid, nationalid, ) data = data + "\n\nCourses Equivalence validty:" + courses return data demo = gr.Interface(docparser, "file", "text") demo.launch()