Spaces:
Sleeping
Sleeping
import gradio | |
from bs4 import BeautifulSoup | |
import pathlib | |
def show_answer(source_html,answer_tsv): | |
questions = [] | |
options = [] | |
images = [] | |
with open(source_html) as fp: | |
soup = BeautifulSoup(fp, 'html.parser') | |
q = soup.find_all("div",{"class":"field"}) | |
for a in q: | |
option = a.find_all("div",{"class": "ui-radio"}) | |
option = [o.get_text(strip=True) for o in option] | |
if len(option) < 2: | |
continue | |
while len(option) < 4: | |
option.append("から") | |
options.append(option) | |
questions.append(a.find_all("div",{"class": "topichtml"})[0].get_text(strip=True)) | |
images.append([x['src'].split('/')[-1] for x in a.find_all("img")]) | |
import pandas as pd | |
df = pd.DataFrame() | |
import re | |
questions = [re.sub( r"\d*\. ", "",q ).strip() for q in questions] | |
option_1 = [q[0].strip() for q in options] | |
option_2 = [q[1].strip() for q in options] | |
option_3 = [q[2].strip() for q in options] | |
option_4 = [q[3].strip() for q in options] | |
image = images | |
df["questions"] = questions | |
df["option_1"] = option_1 | |
df["option_2"] = option_2 | |
df["option_3"] = option_3 | |
df["option_4"] = option_4 | |
answer_df = pd.read_csv(answer_tsv,sep="\t",index_col=0) | |
answers = [] | |
for idx,row in df.iterrows(): | |
answer = answer_df[ | |
answer_df['questions'].str.contains(row['questions']) & \ | |
answer_df['option_1'].str.contains(row['option_1']) & \ | |
answer_df['option_2'].str.contains(row['option_2']) & \ | |
answer_df['option_3'].str.contains(row['option_3']) & \ | |
answer_df['option_4'].str.contains(row['option_4']) \ | |
]['answer'].values | |
answers.append(f"{idx+1}\t{str(answer)} \n {'-'*20}") | |
return "\n".join(answers) | |
inputs = [ | |
gradio.UploadButton("HTMLをアップロード!"), | |
gradio.UploadButton("答えのtsvをアップロード!"), | |
] | |
outputs = gradio.Text() | |
demo = gradio.Interface(fn=show_answer, inputs=inputs, outputs=outputs) | |
demo.launch() |