Spaces:

Risdom
/

english2

Runtime error

App Files Files Community

yosuke-i commited on Jun 19

Commit

5b07cee

•

1 Parent(s): f6ebb2e

Upload 11 files

Browse files

Files changed (11) hide show

README.md +4 -5
app.py +182 -1
category.py +31 -0
chatgpt_api.py +35 -0
convert_chukan_fmt_1.py +140 -0
kousei.py +73 -0
manuscript_conversion.py +68 -0
openai.py +12 -0
select_question.py +92 -0
translate.py +30 -0
voice_create.py +113 -0

README.md CHANGED Viewed

@@ -1,13 +1,12 @@
 ---
-title: English2
-emoji: 📈
 colorFrom: green
-colorTo: pink
 sdk: gradio
-sdk_version: 4.36.1
 app_file: app.py
 pinned: false
-license: apache-2.0
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: English
+emoji: 🏢
 colorFrom: green
+colorTo: blue
 sdk: gradio
+sdk_version: 4.27.0
 app_file: app.py
 pinned: false
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py CHANGED Viewed

	@@ -1 +1,182 @@
1	- ~~ConnectionResetError~~

+import gradio as gr
+import pandas as pd
+import tempfile
+import os
+import re
+from chatgpt_api import get_chatgpt_response
+from voice_create import text_to_speech
+from select_question import create_choice_question
+from manuscript_conversion import manuscript_conversion
+from category import check
+from kousei import kousei2
+from convert_chukan_fmt_1 import convert_chukan_fmt_1
+def kousei(csv_file, input_text):
+    prompt_text = input_text + "指摘は、全ての問題において問題がない場合も含めて、必ず全問題、[id]に続けて結果を書くフォーマットで返してください。[id]の後は改行しないで。[id]はリクエストと完全一致の形式で。（必ず[問題ID]が５つ表示されるはずです）指摘方法は、問題ない場合は「問題なし」、指摘がある場合は「問題あり」\n「問題あり」の場合、問題のある箇所を具体的に指摘してください。\n#リスト"
+    # CSVファイルを読み込む
+    df = pd.read_csv(csv_file)
+    # 'id'列のデータ型を文字列に変換
+    df['id'] = df['id'].astype(str)
+    # DataFrameを5行ごとに処理するためのグループ化
+    df['group_id'] = df.index // 5
+    grouped = df.groupby('group_id')
+    # 各グループに対してフォーマットされたプロンプトを作成
+    def create_prompt(group, base_prompt):
+        prompt = base_prompt
+        for _, row in group.iterrows():
+            prompt += f"\n[{row['id']}]\n{row['原稿']}"
+            print(prompt)
+        return prompt
+    # 各グループごとのプロンプトを生成
+    prompts = grouped.apply(lambda g: create_prompt(g, prompt_text))
+    prompts = prompts.reset_index(name='prompt_after')
+    # 各プロンプトをAPIに送信（この部分は実装に応じて修正が必要です）
+    prompts['response'] = prompts['prompt_after'].apply(get_chatgpt_response)
+    print(prompts['response'])
+    # 各グループに対してフォーマットされたプロンプトを作成
+    def create_prompt(group, base_prompt):
+        prompt = base_prompt
+        for _, row in group.iterrows():
+            prompt += f"\n[{row['id']}]\n{row['原稿']}"
+            print(prompt)
+        return prompt
+    # 各グループごとのプロンプトを生成
+    prompts = grouped.apply(lambda g: create_prompt(g, prompt_text))
+    prompts = prompts.reset_index(name='prompt_after')
+    # 各プロンプトをAPIに送信（この部分は実装に応じて修正が必要です）
+    prompts['response'] = prompts['prompt_after'].apply(get_chatgpt_response)
+    print(prompts['response'])
+    # 応答を分割して元のDataFrameにマッピングする関数
+    def split_responses(grouped_df):
+        rows = []
+        for _, row in grouped_df.iterrows():
+            response = row['response']
+            split_response = re.split(r'\[([^\]]+)\]\s*', response)
+            ids_texts = list(zip(split_response[1::2], split_response[2::2]))
+            for id_text in ids_texts:
+                problem_id, correction_result = id_text
+                # フィルタリングされたDataFrameを安全に取得
+                filtered_df = df[df['id'] == problem_id]
+                original_content = filtered_df['原稿'].iloc[0] if not filtered_df.empty else "原稿が見つかりません"
+                rows.append({
+                    'id': problem_id,
+                    'contents': original_content,
+                    '校正結果': correction_result.strip()
+                })
+        return pd.DataFrame(rows)
+    final_results = split_responses(prompts)
+    # ファイル出力
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+        final_results.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
+        output_path = tmp.name
+    # ファイル名を変更
+    new_path = os.path.join(os.path.dirname(output_path), "output.csv")
+    os.rename(output_path, new_path)
+    return new_path
+title = "英語生成ツール"
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown(
+        f"""
+         # {title}
+        """
+    )
+    with gr.Tab("問題生成（選択肢）"):
+        with gr.Column():
+            gr.Markdown("""
+        ## 利用手順
+        1. こちらの[マスタ](https://drive.google.com/uc?export=download&id=1VyDBtVrnDUlddmITiXg7ybqyB0CTCPeu)を手元に置く
+        2. シート「input」に、生成したい問題パターンを書いてください（赤字の要素は固定。選択肢は可変。適宜行追加OK）
+        3. 完成したら、「ファイル＞名前を付けて保存」から「CSV UTF-8（コンマ区切り）(*.csv)」形式で保存
+        4. 3のCSVを本サイトにアップロード
+        """)
+            with gr.Row():
+                inputs=gr.File(label="CSVファイルをアップロード")
+                outputs=gr.File(label="ダウンロード", file_count="singular")
+            gr.Button("問題生成").click(
+                create_choice_question,
+                inputs=[inputs],
+                outputs=[outputs]
+            )
+    with gr.Tab("原稿変換"):
+        with gr.Column():
+            with gr.Row():
+                inputs=gr.File(label="CSVファイルをアップロード")
+                outputs=gr.File(label="ダウンロード", file_count="singular")
+            gr.Button("変換").click(
+                manuscript_conversion,
+                inputs=[inputs],
+                outputs=[outputs]
+            )
+    with gr.Tab("校正"):
+        with gr.Column():
+            input_text_kousei = gr.Textbox(label="校正観点を入力してください。",value="英単語習得を目的として、以下2種類の問題を用意しています。\n１．英語の正しい日本語訳を選択する4択問題\n２．日本語の正しい英語訳を選択する4択問題\n「#リスト」の誤答選択肢の中に、正解選択肢の別解になってしまっているもの、または別解とは言えないが紛らわしすぎるものがないか、探して指摘してください。")
+            with gr.Row():
+                inputs=gr.File(label="CSVファイルをアップロード")
+                outputs=gr.File(label="ダウンロード", file_count="singular")
+            gr.Button("校正スタート").click(
+                kousei,
+                inputs=[inputs,input_text_kousei],
+                outputs=[outputs]
+            )
+    with gr.Tab("校正2"):
+        with gr.Column():
+            radio_options = ["英語", "日本語"]
+            radio_button = gr.Radio(choices=radio_options, label="選択してください",value="英語")
+            input_text_kousei = gr.Textbox(label="校正観点を入力してください。",value="If there are any typographical errors, omissions, missing or extra spaces and periods, or grammatical mistakes in the English text, please point them out.")
+            with gr.Row():
+                inputs=gr.File(label="CSVファイルをアップロード")
+                outputs=gr.File(label="ダウンロード", file_count="singular")
+            gr.Button("校正スタート").click(
+                kousei2,
+                inputs=[inputs,input_text_kousei,radio_button],
+                outputs=[outputs]
+            )
+    with gr.Tab("音声生成"):
+         with gr.Column():
+             # GradioのUIコンポーネントを定義
+             radio_options = ["ブレイクタイム有", "ブレイクタイム無"]
+             radio_button = gr.Radio(choices=radio_options, label="選択してください",value="ブレイクタイム有")
+             with gr.Row():
+                 file_input = gr.File(label="CSVファイルをアップロード")
+                 submit_button = gr.Button("音声ファイルを生成")
+             file_output = gr.File(label="ダウンロード")
+             submit_button.click(fn=text_to_speech, inputs=[file_input,radio_button], outputs=[file_output])
+    with gr.Tab("中間マスタ生成（意味理解）"):
+        with gr.Column():
+            with gr.Row():
+                file_input = gr.File(label="CSVアップロード")
+                submit_button = gr.Button("ファイルコンバート")
+            file_output = gr.File(label="ファイルをダウンロード")
+            submit_button.click(fn=convert_chukan_fmt_1, inputs=[file_input], outputs=[file_output])
+    with gr.Tab("カテゴリ分類"):
+        with gr.Column():
+            input_text_kousei = gr.Textbox(label="分類観点を入力してください。",value="xxxxxxxに関する内容の場合は「該当あり」と書いてください。")
+            with gr.Row():
+                inputs=gr.File(label="CSVファイルをアップロード")
+                outputs=gr.File(label="ダウンロード", file_count="singular")
+            gr.Button("分類スタート").click(
+                check,
+                inputs=[inputs,input_text_kousei],
+                outputs=[outputs]
+            )
+demo.launch(share=True)

category.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import gradio as gr
+import pandas as pd
+import tempfile
+import os
+from chatgpt_api import get_chatgpt_response2
+from voice_create import text_to_speech
+from select_question import create_choice_question
+from manuscript_conversion import manuscript_conversion
+def check(csv_file, input_text):
+    prompt_text = input_text + "該当しない場合は「該当なし」、該当する場合は「該当あり」としてください\n"
+    # CSVファイルを読み込む
+    df = pd.read_csv(csv_file)
+    # 'id'列のデータ型を文字列に変換
+    df['id'] = df['id'].astype(str)
+    df["prompt"] = prompt_text + df["原稿"]
+    df["分類結果"] = df["prompt"].apply(get_chatgpt_response2)
+    # ファイル出力
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+        df.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
+        output_path = tmp.name
+    # ファイル名を変更
+    new_path = os.path.join(os.path.dirname(output_path), "output.csv")
+    os.rename(output_path, new_path)
+    return new_path

chatgpt_api.py ADDED Viewed

	@@ -0,0 +1,35 @@

+import os
+import requests
+# OpenAI API キーを設定する
+OPENAI_API_KEY = os.environ.get("OPENAI_API_KEY", "sk-proj-arrJA9XvKeo5nfYXLgmeT3BlbkFJNnqFQL9G8qTdYXsBvhJc")
+def get_chatgpt_response(input_text):
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}"
+    }
+    data = {
+        "model": "gpt-4o",
+        "messages": [{"role": "user", "content": input_text}]
+    }
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=data)
+    response_json = response.json()
+    output_text = response_json["choices"][0]["message"]["content"]
+    return output_text
+def get_chatgpt_response2(input_text):
+    headers = {
+        "Content-Type": "application/json",
+        "Authorization": f"Bearer {OPENAI_API_KEY}"
+    }
+    data = {
+        "model": "gpt-4o",
+        "messages": [{"role": "user", "content": input_text}]
+    }
+    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=data)
+    response_json = response.json()
+    output_text = response_json["choices"][0]["message"]["content"]
+    return output_text

convert_chukan_fmt_1.py ADDED Viewed

	@@ -0,0 +1,140 @@

+import pandas as pd
+import os
+import tempfile
+import re
+import random
+import gradio as gr
+# id作成
+def make_id(stage, order):
+    """
+    指定された stage と order に基づいてIDを生成します。
+    Parameters:
+    stage (int): ステージ番号
+    order (int): オーダー番号
+    Returns:
+    str: 特定のフォーマットのID文字列
+    """
+    # フォーマットに従ってIDを生成
+    id_str = f"EC03ST{stage:03d}L{order:03d}"
+    return id_str
+def extract_correct_choice(text):
+    # Use a regular expression to find the content after "【正解選択肢】"
+    match = re.search(r'【正解選択肢】(.+)', text)
+    if match:
+        return match.group(1).strip()
+    else:
+        return None
+# 選択肢シャッフルの関数
+def shuffle_and_combine_options(text_a, text_b):
+    # 正規表現を使って各選択肢を抽出し、タグを除去する
+    def extract_and_clean_options(text):
+        matches = re.findall(r'【[^】]+】([^【]+)', text)
+        return [match.strip() for match in matches]
+    # 列Aと列Bの選択肢を抽出してクリーンアップ
+    options_a = extract_and_clean_options(text_a)
+    options_b = extract_and_clean_options(text_b)
+    # シャッフルのためのインデックスリストを作成
+    indices = list(range(len(options_a)))
+    random.shuffle(indices)
+    # シャッフルされた選択肢を作成
+    shuffled_a = [options_a[i] for i in indices]
+    shuffled_b = [options_b[i] for i in indices]
+    # 各選択肢を/で区切って結合
+    result_a = "/".join(shuffled_a)
+    result_b = "/".join(shuffled_b)
+    return result_a, result_b
+# 解説作成
+def create_explanation(script_english,script_japanese,choices_english, choices_japanese,question_english,question_japanese):
+    choices_english = choices_english.replace("/", "\n")
+    choices_japanese = choices_japanese.replace("/", "\n")
+    question_english = "" if question_english == "" or pd.isnull(question_english) else "\n\nQuestion：\n" + question_english
+    question_japanese = "" if question_japanese == "" or pd.isnull(question_japanese) else  "\n\nQuestion：\n" + question_japanese
+    explanation = f"■英語\n{script_english}{question_english}\n\n選択肢：\n{choices_english}\n\n■日本語訳\n{script_japanese}{question_japanese}\n\n選択肢：\n{choices_japanese}"
+    return explanation
+def convert_chukan_fmt_1(csv_file):
+    df_input = pd.read_csv(csv_file.name)
+    data = {
+        "問題ID": [],
+        "知識ID":[],
+        "出題形式ID":[],
+        "リード文":[],
+        "問題":[],
+        "問題_翻訳":[],
+        "正解文":[],
+        "解説テキスト":[],
+        "選択肢":[],
+        "正解":[],
+        "script":[],
+        "question":[],
+        "choices":[],
+        "eikenn":[],
+    }
+    for index, row in df_input.iterrows():
+        id = make_id(row["ステージ"], row["ステージ内表示順"])
+        try:
+            japanese_selection_1, english_selection_1 = shuffle_and_combine_options(row["日本語訳_問題1_選択肢"], row["問題1_選択肢"])
+            print(id)
+            data["問題ID"].append(id+"Q001")
+            data["知識ID"].append(id)
+            data["出題形式ID"].append(row["出題形式ID"])
+            data["リード文"].append(row["リード文"])
+            data["問題"].append("")
+            data["問題_翻訳"].append(extract_correct_choice(row["日本語訳_問題1_選択肢"]))
+            data["正解文"].append(extract_correct_choice(row["問題1_選択肢"]))
+            data["解説テキスト"].append(create_explanation(row["問題1_スクリプト"], row["日本語訳_問題1_スクリプト"], english_selection_1, japanese_selection_1, row["問題1_Question"], row["日本語訳_問題1_Question"]))
+            data["選択肢"].append(english_selection_1)
+            data["正解"].append(extract_correct_choice(row["問題1_選択肢"]))
+            data["script"].append(row["問題1_スクリプト"])
+            data["question"].append(row["問題1_Question"])
+            data["choices"].append(english_selection_1 if row["選択肢読み上げ有無"] == "有" else "")
+            data["eikenn"].append(row["レベル"])
+            japanese_selection_2, english_selection_2 = shuffle_and_combine_options(row["日本語訳_問題2_選択肢"], row["問題2_選択肢"])
+            id = make_id(row["ステージ"], row["ステージ内表示順"])
+            data["問題ID"].append(id+"Q002")
+            data["知識ID"].append(id)
+            data["出題形式ID"].append(row["出題形式ID"])
+            data["リード文"].append(row["リード文"])
+            data["問題"].append("")
+            data["問題_翻訳"].append(extract_correct_choice(row["日本語訳_問題2_選択肢"]))
+            data["正解文"].append(extract_correct_choice(row["問題2_選択肢"]))
+            data["解説テキス���"].append(create_explanation(row["問題2_スクリプト"], row["日本語訳_問題2_スクリプト"], english_selection_2, japanese_selection_2, row["問題2_Question"], row["日本語訳_問題2_Question"]))
+            data["選択肢"].append(english_selection_2)
+            data["正解"].append(extract_correct_choice(row["問題2_選択肢"]))
+            data["script"].append(row["問題2_スクリプト"])
+            data["question"].append(row["問題2_Question"])
+            data["choices"].append(english_selection_2 if row["選択肢読み上げ有無"] == "有" else "")
+            data["eikenn"].append(row["レベル"])
+        except Exception as e:
+            gr.Warning(f"次の問題でエラーが発生: {id} - {str(e)}")
+            return f"次の問題でエラーが発生: {id} - {str(e)}"
+    df_output = pd.DataFrame(data)
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+        df_output.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
+        output_path = tmp.name
+    # ファイル名を変更
+    new_path = os.path.join(os.path.dirname(output_path), "output.csv")
+    os.rename(output_path, new_path)
+    return new_path

kousei.py ADDED Viewed

	@@ -0,0 +1,73 @@

+import gradio as gr
+import pandas as pd
+import tempfile
+import os
+import re
+from chatgpt_api import get_chatgpt_response2
+from voice_create import text_to_speech
+from select_question import create_choice_question
+from manuscript_conversion import manuscript_conversion
+def check_text(text, radio_option):
+    errors = []
+    error_details = []
+    # Split the text into sentences for individual checks
+    # This regex splits on punctuation but keeps the punctuation with the previous sentence
+    sentences = re.split(r'(?<=[。！？?!.])\s*|\n', text)
+    for sentence in sentences:
+        sentence = sentence.strip()
+        if not sentence:
+            continue
+        if radio_option == "日本語":
+            if not re.search(r'[。！]$', sentence):
+                errors.append("文末に句点がありません。")
+                error_details.append(f"文末に「。」または「！」がない: '{sentence}'")
+        else:
+            # Check for multiple spaces
+            if re.search(r'  +', sentence):
+                errors.append("半角スペースが2つ以上入っています。")
+                multiple_spaces_parts = re.findall(r'[^ ]*  +[^ ]*', sentence)
+                for part in multiple_spaces_parts:
+                    error_details.append(f"スペースが２つある部分: '{part.strip()}'")
+            # Check for punctuation at the end of the sentence
+            if not re.search(r'[.!?]$', sentence):
+                errors.append("文末にピリオドや?や!のいずれかがついていません。")
+                error_details.append(f"文末に「.」または「!」または「?」がない: '{sentence}'")
+    if errors:
+        return "チェック観点:\n" + "\n".join(errors) + "\n\n詳細:\n" + "\n".join(error_details)
+    else:
+        return "全てのチェックをクリアしました。"
+def kousei2(csv_file, input_text,radio_option):
+    prompt_text = "#Instructions\n" + input_text +" If there is no problem, please reply with only 2 letters 'OK' and DON'T put any other extra words. \n #Target sentence\n"
+    # CSVファイルを読み込む
+    df = pd.read_csv(csv_file)
+    # 'id'列のデータ型を文字列に変換
+    df['id'] = df['id'].astype(str)
+    df["prompt"] = prompt_text + df["原稿"]
+    df["GPT校正結果"] = df["prompt"].apply(get_chatgpt_response2)
+    print("radio_option",radio_option)
+    df["タイプミス校正結果"] = df["原稿"].apply(check_text, args=(radio_option,))
+    # ファイル出力
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+        df.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
+        output_path = tmp.name
+    # ファイル名を変更
+    new_path = os.path.join(os.path.dirname(output_path), "output.csv")
+    os.rename(output_path, new_path)
+    return new_path

manuscript_conversion.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import pandas as pd
+import re
+import os
+import tempfile
+# 新しいカラムを作成する関数
+def process_text(row, column_name):
+    script_start_pattern = r"##\s*リスニングスクリプト"
+    question_start = "Question:"
+    choice_start = "##選択肢"
+    correct_choice = "【正解選択肢】"
+    # 【正解選択肢】の直前に ##選択肢 を挿入する処理
+    if correct_choice in row[column_name] and not choice_start in row[column_name]:
+        row[column_name] = row[column_name].replace(correct_choice, choice_start + correct_choice)
+    # 正規表現を使用してスクリプト開始部分を探す
+    script_parts = re.split(script_start_pattern, row[column_name], flags=re.IGNORECASE)
+    if len(script_parts) > 1:
+        script_part = script_parts[1]
+        if choice_start in script_part:
+            choice_part = script_part.split(choice_start, 1)[1]
+            script_text = script_part.split(choice_start)[0].strip()
+            choice_text = choice_part.strip()
+            if question_start in script_text:
+                question_text = script_text.split(question_start, 1)[1]
+                script_text = script_text.split(question_start)[0].strip()
+            else:
+                question_text = ""
+            return pd.Series({
+                f'{column_name}_スクリプト': script_text,
+                f'{column_name}_Question': question_text,
+                f'{column_name}_選択肢': choice_text
+            })
+    # 必要なセクションが存在しない場合、空の文字列を返す
+    return pd.Series({
+        f'{column_name}_スクリプト': "",
+        f'{column_name}_Question': "",
+        f'{column_name}_選択肢': ""
+    })
+# 処理を行うカラムのリスト
+columns_to_process = ['問題1', '問題2', '日本語訳_問題1', '日本語訳_問題2']
+def manuscript_conversion(csv_file):
+    # CSVファイルを読み込む
+    df = pd.read_csv(csv_file.name)
+    # 各カラムに対して処理を適用し、新しいカラムをDataFrameに追加
+    for column in columns_to_process:
+        df = df.join(df.apply(lambda row: process_text(row, column), axis=1))
+    # 元の問題1と問題2、日本語訳_問題1と日本語訳_問題2のカラムを削除
+    df.drop(columns=columns_to_process, inplace=True)
+    # csvを書き出す
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+        # cp932で保存、エラーは無視（置換しても良い）
+        df.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
+        output_path = tmp.name
+    # ファイル名を変更
+    new_path = os.path.join(os.path.dirname(output_path), "output.csv")
+    os.rename(output_path, new_path)
+    return new_path

openai.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# from openai import OpenAI
+# client = OpenAI()
+# def exec_gpt4(prompt):
+#     openai.api_key = 'sk-proj-arrJA9XvKeo5nfYXLgmeT3BlbkFJNnqFQL9G8qTdYXsBvhJc'
+#     response = client.chat.completions.create(
+#         model='gpt-4o',
+#         messages=[
+#             {"role": "user", "content": prompt},
+#         ],
+#     )
+#     return response.choices[0].message.content.strip()

select_question.py ADDED Viewed

	@@ -0,0 +1,92 @@

+from chatgpt_api import get_chatgpt_response
+import pandas as pd
+import re
+import tempfile
+import os
+from translate import translate_text
+# プロンプトを作成
+def make_prompt(df):
+    rows = []
+    for index, row in df.iterrows():
+        theme_str = str(row["テーマ"])
+        if theme_str.lower() == 'nan':
+            continue
+        themes_list = theme_str.split(',')
+        questiontext = ""
+        if "Question" in row["サンプル"]:
+            questiontext = "・Questionがある場合は、「##リスニングスクリプト」の表現を真似しすぎず、what,why,how,when,whereなど、多様な角度から問う\n"
+        else:
+            questiontext = ""
+        for theme in themes_list:
+            new_prompt = "#条件\n"+ "・以下の「#サンプル」の形式（[問題1]##リスニングスクリプト ... ##選択肢...[問題2]##リスニングスクリプト...##選択肢...）で、"+row['レベル']+"レベルのリスニング問題を生成\n"+"・[問題1]から[問題"+str(int(row["問題数"]))+"]まで必ず##リスニングスクリプトを"+str(int(row["問題数"]))+"種類作成\n"+ "・「##リスニングスクリプト」から「##選択肢」まですべて出力\n"+ "・「##リスニングスクリプト」は"+row['スクリプトパターン']+"\n"+"・問題の難易度、word数は「##リスニングスクリプト」と同等レベル\n"+ questiontext +"・各問題は、[問題1][問題2]という形式で始める\n"+"・ただし、スクリプトの内容は、"+theme+"に関するスクリプト\n"+"・[問題1]から[問題"+str(int(row["問題数"]))+"]までの誤答選択肢はすべて異なる内容・異なるパターン \n \n"+"#サンプル\n"+row["サンプル"]
+            print(new_prompt)
+            rows.append({
+                "形式":row["形式2"],
+                "レベル":row["レベル"],
+                "テーマ":theme,
+                "選択肢読み上げ有無":row["選択肢読み上げ有無"],
+                "複製パターン":row["複製パターン"],
+                "プロンプト":new_prompt
+            })
+    result_df = pd.DataFrame(rows)
+    return result_df
+# 問題の分割と展開
+def expand_problems(df):
+    temp_data = []
+    # テキスト全体を処理
+    for index, row in df.iterrows():
+        print("返却値",row['問題1'])
+        # 正規表現を用いて各問題を分割
+        problems = re.findall(r'\[(問題\d+)\](.*?)(?=\[問題\d+\]|$)', row['問題1'], re.DOTALL)
+        # 各問題のIDとともに新たな行を追加
+        for num, text in problems:
+            temp_data.append({
+                "形式": row["形式"],
+                "レベル": row["レベル"],
+                "テーマ":row["テーマ"],
+                "選択肢読み上げ有無": row["選択肢読み上げ有無"],
+                "複製パターン": row["複製パターン"],
+                "問題1": text.strip()
+            })
+    # 一時リストから新しいDataFrameを作成
+    return pd.DataFrame(temp_data)
+def create_choice_question(csv_file):
+    # CSVファイルを読み込む
+    df = pd.read_csv(csv_file.name)
+    # プロンプトを作成する
+    df_prompt = make_prompt(df)
+    # 問題を生成する
+    ## 問題1を生成する
+    df_prompt["問題1"] = df_prompt["プロンプト"].apply(get_chatgpt_response)
+    ## 問題1を展開する
+    result_df = expand_problems(df_prompt)
+    ## 問題2を生成する
+    result_df["プロンプト2"] = result_df["複製パターン"]+"\n"+result_df["問題1"]
+    result_df["問題2"] = result_df["プロンプト2"].apply(get_chatgpt_response)
+    # 翻訳する
+    result_df["翻訳_問題1"] = result_df["問題1"].apply(translate_text)
+    result_df["翻訳_問題2"] = result_df["問題2"].apply(translate_text)
+    # csvを書き出す
+    with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
+        # cp932で保存、エラーは無視（置換しても良い）
+        result_df.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
+        output_path = tmp.name
+    # ファイル名を変更
+    new_path = os.path.join(os.path.dirname(output_path), "output.csv")
+    os.rename(output_path, new_path)
+    return new_path

translate.py ADDED Viewed

	@@ -0,0 +1,30 @@

+import requests
+import json
+def translate_text(text, target_lang='ja', source_lang='en'):
+    api_key = "AIzaSyAEzK5_n6zKTimD9yoXS-C8O0xN_4LaVBQ"
+    # 元の改行を `<br>` で置き換え
+    text = text.replace('\n', '<br>')
+    url = f"https://translation.googleapis.com/language/translate/v2?key={api_key}"
+    payload = {
+        'q': text,
+        'source': source_lang,
+        'target': target_lang,
+        'format': 'html'  # フォーマットを 'html' に変更
+    }
+    headers = {
+        'Content-Type': 'application/json'
+    }
+    response = requests.post(url, headers=headers, json=payload)
+    if response.status_code == 200:
+        translated_text = json.loads(response.text)['data']['translations'][0]['translatedText']
+        # 翻訳されたテキストの `<br>` を改行に戻す
+        translated_text = translated_text.replace('<br>', '\n')
+        # "##選択肢" と "【正解選択肢】" の間に改行を確実に挿入
+        translated_text = translated_text.replace("##選択肢【正解選択肢】", "##選択肢\n【正解選択肢】")
+        return translated_text
+    else:
+        return "Error: " + response.text

voice_create.py ADDED Viewed

	@@ -0,0 +1,113 @@

+import pandas as pd
+import zipfile
+import base64
+import os
+import requests
+import re
+def remove_quotes(text):
+    # ダブルクォートを空文字に置き換える
+    return text.replace('"', '')
+def text_to_speech(input_file,selected_option):
+    # APIキーを直接コードに埋め込む（実際の運用では推奨されません）
+    api_key = 'AIzaSyAEzK5_n6zKTimD9yoXS-C8O0xN_4LaVBQ'  # ここを実際のAPIキーに置き換えてください
+    data = pd.read_csv(input_file)
+    zip_path = 'output_audio_files.zip'
+    with zipfile.ZipFile(zip_path, 'w') as z:
+        for idx, row in data.iterrows():
+            # script列が文字列か確認し、文字列でない場合は空文字に置き換える
+            script = row.get('script', '')
+            if not isinstance(script, str):
+                script = str(script)
+            # テキストをA:やB:で分割
+            parts = re.split(r'(A:|B:)', script)
+            print(parts)
+            ssml_parts = []
+            print(parts)
+            # 交互に発言するAとBの内容を順に処理
+            for i in range(1, len(parts), 2):
+                if parts[i] == "A:":
+                    voice_name = row["voiceA"]
+                    print("A")
+                elif parts[i] == "B:":
+                    voice_name = row["voiceB"]
+                else:
+                    print("空白")
+                    continue  # A:またはB:で始まらない行は無視
+                text = parts[i + 1].strip()
+                text = remove_quotes(text)
+                print("テキスト",text)
+                # 1sに変換する前に除外するコード
+                text = text.replace("a.m.", 'AM')
+                text = text.replace("p.m.", 'PM')
+                text = text.replace("U.S.", 'US')
+                text = text.replace("U.K.", 'UK')
+                text = text.replace("Mr.", 'Mister')
+                text = text.replace("Ms.", 'MIZ')
+                text = text.replace("Mrs.", 'Misiz')
+                text = text.replace("Dr.", 'Doctor')
+                text = text.replace("Mt.", 'Mount')
+                # テキスト内の改行を1sの間に変換
+                text = text.replace("\n", '<break time="1s"/>')
+                text = text.replace(".", '.<break time="500ms"/>')
+                if selected_option == "ブレイクタイム有":
+                    # 「,」で時間を空ける
+                    if row["eikenn"] in ["5級","4級","3級","準2級","2級","準1級"]:
+                        text = text.replace(",", '<break time="50ms"/>')
+                        print("タグ処理")
+                    else:
+                        pass
+                ssml_parts.append(f'<voice name="{voice_name}"><prosody rate="{row["speed"]}"><p>{text}</p></prosody></voice>')
+                print(ssml_parts)
+            ssml = '<speak>' + ''.join(ssml_parts)
+            print(ssml)
+            if pd.notna(row.get('question')) and row['question'] != '':
+                ssml += f'<break time="1s"/><voice name="{row["voiceQuestion"]}"><prosody rate="{row["speed"]}"><p>Question</p><break time="1s"/><p>{row["question"]}</p></prosody></voice>'
+                # Choices部分の追加
+            if pd.notna(row.get('choices')) and row['choices'] != '':
+                choices_list = row['choices'].split('/')
+                choices_ssml = '<break time="1s"/>'.join(choices_list)
+                ssml += f'<break time="1s"/><voice name="{row["voiceB"]}"><prosody rate="{row["speed"]}"><p>{choices_ssml}</p></prosody></voice>'
+            ssml += '</speak>'
+            print(ssml)
+            # APIリクエスト用のボディ
+            body = {
+                "input": {"ssml": ssml},
+                "voice": {"languageCode": "en-US"},  # 基本的な言語設定（必要に応じて行ごとに変更可能）
+                "audioConfig": {"audioEncoding": "MP3"}
+            }
+            headers = {
+                "X-Goog-Api-Key": api_key,
+                "Content-Type": "application/json"
+            }
+            url = "https://texttospeech.googleapis.com/v1/text:synthesize"
+            response = requests.post(url, headers=headers, json=body)
+            print("レスポンス",response)
+            response_data = response.json()
+            print("レスポンスデータ",response_data)
+            # 音声コンテンツの取得とファイル保存
+            if 'audioContent' in response_data:
+                audio_content = base64.b64decode(response_data['audioContent'])
+                file_name = f"{row['id']}.mp3"
+                with open(file_name, "wb") as out:
+                    out.write(audio_content)
+                z.write(file_name)
+                os.remove(file_name)
+            else:
+                print("ファイル不備")
+    return zip_path