Spaces:

Risdom
/

english2

Sleeping

App Files Files Community

english2 / manuscript_conversion.py

yosuke-i

Upload 11 files

5b07cee verified about 1 year ago

raw

history blame contribute delete

2.78 kB

	import pandas as pd
	import re
	import os
	import tempfile


	# 新しいカラムを作成する関数
	def process_text(row, column_name):
	script_start_pattern = r"##\s*リスニングスクリプト"
	question_start = "Question:"
	choice_start = "##選択肢"
	correct_choice = "【正解選択肢】"

	# 【正解選択肢】の直前に ##選択肢を挿入する処理
	if correct_choice in row[column_name] and not choice_start in row[column_name]:
	row[column_name] = row[column_name].replace(correct_choice, choice_start + correct_choice)

	# 正規表現を使用してスクリプト開始部分を探す
	script_parts = re.split(script_start_pattern, row[column_name], flags=re.IGNORECASE)
	if len(script_parts) > 1:
	script_part = script_parts[1]
	if choice_start in script_part:
	choice_part = script_part.split(choice_start, 1)[1]
	script_text = script_part.split(choice_start)[0].strip()
	choice_text = choice_part.strip()

	if question_start in script_text:
	question_text = script_text.split(question_start, 1)[1]
	script_text = script_text.split(question_start)[0].strip()
	else:
	question_text = ""

	return pd.Series({
	f'{column_name}_スクリプト': script_text,
	f'{column_name}_Question': question_text,
	f'{column_name}_選択肢': choice_text
	})
	# 必要なセクションが存在しない場合、空の文字列を返す
	return pd.Series({
	f'{column_name}_スクリプト': "",
	f'{column_name}_Question': "",
	f'{column_name}_選択肢': ""
	})

	# 処理を行うカラムのリスト
	columns_to_process = ['問題1', '問題2', '日本語訳_問題1', '日本語訳_問題2']


	def manuscript_conversion(csv_file):
	# CSVファイルを読み込む
	df = pd.read_csv(csv_file.name)

	# 各カラムに対して処理を適用し、新しいカラムをDataFrameに追加
	for column in columns_to_process:
	df = df.join(df.apply(lambda row: process_text(row, column), axis=1))

	# 元の問題1と問題2、日本語訳_問題1と日本語訳_問題2のカラムを削除
	df.drop(columns=columns_to_process, inplace=True)

	# csvを書き出す
	with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
	# cp932で保存、エラーは無視（置換しても良い）
	df.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
	output_path = tmp.name
	# ファイル名を変更
	new_path = os.path.join(os.path.dirname(output_path), "output.csv")
	os.rename(output_path, new_path)
	return new_path