english2 / kousei.py
yosuke-i's picture
Upload 11 files
5b07cee verified
import gradio as gr
import pandas as pd
import tempfile
import os
import re
from chatgpt_api import get_chatgpt_response2
from voice_create import text_to_speech
from select_question import create_choice_question
from manuscript_conversion import manuscript_conversion
def check_text(text, radio_option):
errors = []
error_details = []
# Split the text into sentences for individual checks
# This regex splits on punctuation but keeps the punctuation with the previous sentence
sentences = re.split(r'(?<=[。!??!.])\s*|\n', text)
for sentence in sentences:
sentence = sentence.strip()
if not sentence:
continue
if radio_option == "日本語":
if not re.search(r'[。!]$', sentence):
errors.append("文末に句点がありません。")
error_details.append(f"文末に「。」または「!」がない: '{sentence}'")
else:
# Check for multiple spaces
if re.search(r' +', sentence):
errors.append("半角スペースが2つ以上入っています。")
multiple_spaces_parts = re.findall(r'[^ ]* +[^ ]*', sentence)
for part in multiple_spaces_parts:
error_details.append(f"スペースが2つある部分: '{part.strip()}'")
# Check for punctuation at the end of the sentence
if not re.search(r'[.!?]$', sentence):
errors.append("文末にピリオドや?や!のいずれかがついていません。")
error_details.append(f"文末に「.」または「!」または「?」がない: '{sentence}'")
if errors:
return "チェック観点:\n" + "\n".join(errors) + "\n\n詳細:\n" + "\n".join(error_details)
else:
return "全てのチェックをクリアしました。"
def kousei2(csv_file, input_text,radio_option):
prompt_text = "#Instructions\n" + input_text +" If there is no problem, please reply with only 2 letters 'OK' and DON'T put any other extra words. \n #Target sentence\n"
# CSVファイルを読み込む
df = pd.read_csv(csv_file)
# 'id'列のデータ型を文字列に変換
df['id'] = df['id'].astype(str)
df["prompt"] = prompt_text + df["原稿"]
df["GPT校正結果"] = df["prompt"].apply(get_chatgpt_response2)
print("radio_option",radio_option)
df["タイプミス校正結果"] = df["原稿"].apply(check_text, args=(radio_option,))
# ファイル出力
with tempfile.NamedTemporaryFile(delete=False, suffix='.csv') as tmp:
df.to_csv(tmp.name, index=False, encoding='cp932', errors='ignore')
output_path = tmp.name
# ファイル名を変更
new_path = os.path.join(os.path.dirname(output_path), "output.csv")
os.rename(output_path, new_path)
return new_path