File size: 1,460 Bytes
97ec4dd
c6a7684
bca32bb
4086d0d
c6a7684
 
97ec4dd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2d0abd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97ec4dd
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
import os

os.system("pip install -U transformers==3.0.0")
os.system("pip install nltk torch docx2txt")
os.system("python -m nltk.downloader punkt")

import gradio as gr
import pandas as pd
from question_generation.pipelines import pipeline
import docx2txt

def process_file(Notes):

  nlp = pipeline("question-generation", model="valhalla/t5-small-qg-prepend", qg_format="prepend")
  
  target_word_doc = Notes.name
  raw_word_file = docx2txt.process(target_word_doc)

  #remove empty lines
  preprocessed_sentence_list = [i for i in raw_word_file.splitlines() if i != ""]

  #grab content
  #processed_sentence_list = []
  #content = False
  #for i in preprocessed_sentence_list:
  #  if "Outline" in i:
  #    content = True
  #    continue
  #  if "Summary Learning Points" in i:
  #    content = False
  #    continue
  #  if "Learning Activity" in i:
  #    content = False
  #    continue
  #  if content == True:
  #    processed_sentence_list.append(i.lstrip())

  qa_list = nlp(" ".join(preprocessed_sentence_list))
  formatted_questions = "\n".join([str(idx+1) + ". " + i["question"] for idx, i in enumerate(qa_list)])
  formatted_answers = "\n".join([str(idx+1) + ". " + i["answer"] for idx, i in enumerate(qa_list)])
  return [formatted_questions, formatted_answers]

io = gr.Interface(process_file, "file", outputs=
                  [gr.Textbox(lines=1, label="Questions"), 
                   gr.Textbox(lines=1, label="Answers")])
io.launch()