import gradio from bs4 import BeautifulSoup import pathlib def show_answer(source_html,answer_tsv): questions = [] options = [] images = [] with open(source_html) as fp: soup = BeautifulSoup(fp, 'html.parser') q = soup.find_all("div",{"class":"field"}) for a in q: option = a.find_all("div",{"class": "ui-radio"}) option = [o.get_text(strip=True) for o in option] if len(option) < 2: continue while len(option) < 4: option.append("から") options.append(option) questions.append(a.find_all("div",{"class": "topichtml"})[0].get_text(strip=True)) images.append([x['src'].split('/')[-1] for x in a.find_all("img")]) import pandas as pd df = pd.DataFrame() import re questions = [re.sub( r"\d*\. ", "",q ).strip() for q in questions] option_1 = [q[0].strip() for q in options] option_2 = [q[1].strip() for q in options] option_3 = [q[2].strip() for q in options] option_4 = [q[3].strip() for q in options] image = images df["questions"] = questions df["option_1"] = option_1 df["option_2"] = option_2 df["option_3"] = option_3 df["option_4"] = option_4 answer_df = pd.read_csv(answer_tsv,sep="\t",index_col=0) answers = [] for idx,row in df.iterrows(): answer = answer_df[ answer_df['questions'].str.contains(row['questions']) & \ answer_df['option_1'].str.contains(row['option_1']) & \ answer_df['option_2'].str.contains(row['option_2']) & \ answer_df['option_3'].str.contains(row['option_3']) & \ answer_df['option_4'].str.contains(row['option_4']) \ ]['answer'].values answers.append(f"{idx+1}\t{str(answer)} \n {'-'*20}") return "\n".join(answers) inputs = [ gradio.UploadButton("HTMLをアップロード!"), gradio.UploadButton("答えのtsvをアップロード!"), ] outputs = gradio.Text() demo = gradio.Interface(fn=show_answer, inputs=inputs, outputs=outputs) demo.launch()