Spaces:

Risdom
/

english2

Sleeping

App Files Files Community

english2 / convert_chukan_fmt_1.py

yosuke-i

Update convert_chukan_fmt_1.py

ae281dd verified 12 months ago

raw

history blame contribute delete

6.66 kB

	import pandas as pd
	import os
	import tempfile
	import re
	import random
	import gradio as gr


	# id作成
	def make_id(stage, order):
	"""
	指定された stage と order に基づいてIDを生成します。

	Parameters:
	stage (int): ステージ番号
	order (int): オーダー番号

	Returns:
	str: 特定のフォーマットのID文字列
	"""
	# フォーマットに従ってIDを生成
	id_str = f"EC03ST{stage:03d}L{order:03d}"
	return id_str

	def extract_correct_choice(text):
	# Use a regular expression to find the content after "【正解選択肢】"
	match = re.search(r'【正解選択肢】(.+)', text)
	if match:
	return match.group(1).strip()
	else:
	return None

	# 選択肢シャッフルの関数
	def shuffle_and_combine_options(text_a, text_b):
	# 正規表現を使って各選択肢を抽出し、タグを除去する
	def extract_and_clean_options(text):
	matches = re.findall(r'【[^】]+】([^【]+)', text)
	return [match.strip() for match in matches]

	# 列Aと列Bの選択肢を抽出してクリーンアップ
	options_a = extract_and_clean_options(text_a)
	options_b = extract_and_clean_options(text_b)

	# シャッフルのためのインデックスリストを作成
	indices = list(range(len(options_a)))
	random.shuffle(indices)

	# シャッフルされた選択肢を作成
	shuffled_a = [options_a[i] for i in indices]
	shuffled_b = [options_b[i] for i in indices]

	# 各選択肢を/で区切って結合
	result_a = "/".join(shuffled_a)
	result_b = "/".join(shuffled_b)

	return result_a, result_b

	# 解説作成
	def create_explanation(script_english,script_japanese,choices_english, choices_japanese,question_english,question_japanese):
	choices_english = choices_english.replace("/", "\n")
	choices_japanese = choices_japanese.replace("/", "\n")
	question_english = "" if question_english == "" or pd.isnull(question_english) else "\n\nQuestion：\n" + question_english
	question_japanese = "" if question_japanese == "" or pd.isnull(question_japanese) else "\n\nQuestion：\n" + question_japanese
	explanation = f"■英語\n{script_english}{question_english}\n\n選択肢：\n{choices_english}\n\n■日本語訳\n{script_japanese}{question_japanese}\n\n選択肢：\n{choices_japanese}"
	return explanation


	# 改行を明示的に入れる
	def replace_newlines_in_text(text):
	if isinstance(text, str):
	return text.replace('\n', '\\n')
	return text


	def convert_chukan_fmt_1(csv_file):
	df_input = pd.read_csv(csv_file.name)
	data = {
	"問題ID": [],
	"知識ID":[],
	"出題形式ID":[],
	"リード文":[],
	"問題":[],
	"問題_翻訳":[],
	"正解文":[],
	"解説テキスト":[],
	"選択肢":[],
	"正解":[],
	"リスト表示":[],
	"script":[],
	"question":[],
	"choices":[],
	"eikenn":[],
	}
	for index, row in df_input.iterrows():
	id = make_id(row["ステージ"], row["ステージ内表示順"])
	try:
	japanese_selection_1, english_selection_1 = shuffle_and_combine_options(row["日本語訳_問題1_選択肢"], row["問題1_選択肢"])
	print(id)
	data["問題ID"].append(id+"Q001")
	data["知識ID"].append(id)
	data["出題形式ID"].append(row["出題形式ID"])
	data["リード文"].append(row["リード文"])
	data["問題"].append("")
	data["問題_翻訳"].append(extract_correct_choice(row["日本語訳_問題1_選択肢"]))
	data["正解文"].append(extract_correct_choice(row["問題1_選択肢"]))
	data["解説テキスト"].append(replace_newlines_in_text(create_explanation(row["問題1_スクリプト"], row["日本語訳_問題1_スクリプト"], english_selection_1, japanese_selection_1, row["問題1_Question"], row["日本語訳_問題1_Question"])))
	data["選択肢"].append(english_selection_1)
	data["正解"].append(extract_correct_choice(row["問題1_選択肢"]))
	data["リスト表示"].append(replace_newlines_in_text(row["問題1_スクリプト"]))
	data["script"].append(row["問題1_スクリプト"])
	data["question"].append(row["問題1_Question"])
	data["choices"].append(english_selection_1 if row["選択肢読み上げ有無"] == "有" else "")
	data["eikenn"].append(row["レベル"])

	japanese_selection_2, english_selection_2 = shuffle_and_combine_options(row["日本語訳_問題2_選択肢"], row["問題2_選択肢"])
	id = make_id(row["ステージ"], row["ステージ内表示順"])
	data["問題ID"].append(id+"Q002")
	data["知識ID"].append(id)
	data["出題形式ID"].append(row["出題形式ID"])
	data["リード文"].append(row["リード文"])
	data["問題"].append("")
	data["問題_翻訳"].append(extract_correct_choice(row["日本語訳_問題2_選択肢"]))
	data["正解文"].append(extract_correct_choice(row["問題2_選択肢"]))
	data["解説テキスト"].append(replace_newlines_in_text(create_explanation(row["問題2_スクリプト"], row["日本語訳_問題2_スクリプト"], english_selection_2, japanese_selection_2, row["問題2_Question"], row["日本語訳_問題2_Question"])))
	data["選択肢"].append(english_selection_2)
	data["正解"].append(extract_correct_choice(row["問題2_選択肢"]))
	data["リスト表示"].append("")
	data["script"].append(row["問題2_スクリプト"])
	data["question"].append(row["問題2_Question"])
	data["choices"].append(english_selection_2 if row["選択肢読み上げ有無"] == "有" else "")
	data["eikenn"].append(row["レベル"])

	except Exception as e:
	gr.Warning(f"次の問題でエラーが発生: {id} - {str(e)}")
	return f"次の問題でエラーが発生: {id} - {str(e)}"





	df_output = pd.DataFrame(data)

	with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
	df_output.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
	output_path = tmp.name

	# ファイル名を変更
	new_path = os.path.join(os.path.dirname(output_path), "output.csv")
	os.rename(output_path, new_path)
	return new_path