File size: 2,069 Bytes
3dbda0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
import gradio
from bs4 import BeautifulSoup
import pathlib

def show_answer(source_html,answer_tsv):
    questions = []
    options = []
    images = []
    with open(source_html) as fp:
        soup = BeautifulSoup(fp, 'html.parser')
    q = soup.find_all("div",{"class":"field"})
    for a in q:
        option = a.find_all("div",{"class": "ui-radio"})
        option = [o.get_text(strip=True) for o in option]
        if len(option) < 2:
            continue  
        while len(option) < 4:
            option.append("から")
        options.append(option)
        questions.append(a.find_all("div",{"class": "topichtml"})[0].get_text(strip=True))
        images.append([x['src'].split('/')[-1] for x in a.find_all("img")])

    import pandas as pd
    df = pd.DataFrame()

    import re
    questions = [re.sub( r"\d*\. ", "",q ).strip() for q in questions]
    option_1 = [q[0].strip() for q in options]
    option_2 = [q[1].strip() for q in options]
    option_3 = [q[2].strip() for q in options]
    option_4 = [q[3].strip() for q in options]
    image = images
    df["questions"] = questions
    df["option_1"] = option_1
    df["option_2"] = option_2
    df["option_3"] = option_3
    df["option_4"] = option_4
    answer_df = pd.read_csv(answer_tsv,sep="\t",index_col=0)
    answers = []
    for idx,row in df.iterrows():
        answer = answer_df[
            answer_df['questions'].str.contains(row['questions']) & \
            answer_df['option_1'].str.contains(row['option_1']) & \
            answer_df['option_2'].str.contains(row['option_2']) & \
            answer_df['option_3'].str.contains(row['option_3']) & \
            answer_df['option_4'].str.contains(row['option_4'])  \
        ]['answer'].values
        answers.append(f"{idx+1}\t{str(answer)} \n {'-'*20}")
    return "\n".join(answers)

inputs = [
    gradio.UploadButton("HTMLをアップロード!"),
    gradio.UploadButton("答えのtsvをアップロード!"),
]
outputs = gradio.Text()

demo = gradio.Interface(fn=show_answer, inputs=inputs, outputs=outputs)
demo.launch()