Spaces:

myasumoto
/

scoringReport

Runtime error

File size: 13,129 Bytes

304d0e6

from openai import OpenAI
import os
import math
import gradio as gr
import csv
import glob
import re
from bs4 import BeautifulSoup

def calculate(skkey, folder_path, output_path, c0, threshold, t1, s1, s1m, t2, s2, reason1, s3, reason2, s4, c1, c2):    
    def CalculateCountScore(text):
        cs = ()
        text_length = len(text)
        deviation = abs(text_length - threshold) / float(threshold) * 100

        if c0 == True:
            if threshold >= text_length:
                sScore = s1 * (1 - (threshold - text_length) / float(threshold))
            else:
                sScore = min(s1 * (1 + (text_length - threshold) / float(threshold)), s1m)
            return(text_length, round(sScore))
            pass
        else:
            if deviation <= t1:
                return (text_length, s1)
            elif deviation <= t2:
                return (text_length, s2)
            else:
                return (text_length,1)

    def extract_text_from_html(html):
        soup = BeautifulSoup(html, 'html.parser')
        text = soup.get_text(separator=' ')
        return text.strip()

    #ChatGPTでの採点命令
    def CheckReport(text):
        inputTxt = '"""'+text+'"""'

        res = client.chat.completions.create(model="gpt-3.5-turbo",
                                          messages=[
                                              {
                                                  "role":"system",
                                                  "content":criterion
                                              },
                                              {
                                                  "role":"user",
                                                  "content":inputTxt
                                              },
                                          ],
                                          )
        return res.choices[0].message.content

    # 改行コードを取り除く
    def removeTrashes(string_list):
        # 各要素から改行コードを取り除いた新しいリストを作成
        new_string_list = [string.replace('\n', '').replace('\r', '').replace('点、', ',').replace('点。', ',').replace('False、','False,').replace('True、','True,').replace('TRUE、','True,').replace('FALSE、','False,').replace("TRUE","True").replace("FALSE","False") for string in string_list]
        return new_string_list
    
    #箇条書きから必要なものをリストにする
    def extract_elements_from_report(report):
        lines = report.split("\n")
        elements = []

        for line in lines:
            if line.startswith("点数1:"):
                elements.append(float(line.split(":")[1].strip()))
            elif line.startswith("理由1:"):
                elements.append(line.split(":")[1].strip())
            elif line.startswith("点数2:"):
                elements.append(float(line.split(":")[1].strip()))
            elif line.startswith("理由2:"):
                elements.append(line.split(":")[1].strip())

        return elements

    

    # 文字数の点数と中身の点数を足す
    def CalculateScore(output_data):
        new_output_data = []
        for data in output_data:
            # 各行を`,`で分割してリストにする
            items = data.split(',')
            if len(items) >= 5:
                # 3番目と4番目と6番目の数字を取得し、足して新しいデータを作成
                term3 = int(items[2])
                term4_str = ''.join(filter(str.isdigit, items[3]))  # 数字以外のごみを取り除く
                term4 = int(term4_str) if term4_str else 0
                term6_str = ''.join(filter(str.isdigit, items[5]))
                term6 = int(term6_str) if term6_str else 0
                
                termSum = term3 + term4 + term6
                new_data = ','.join(items[:1] + [str(termSum)] + items[1:])
                new_output_data.append(new_data)
            else:
                new_output_data.append(data)
        return new_output_data

    #deploy時にはコメントアウト箇所を変更すること
    #openai.api_key = skkey
    client = OpenAI(
        # This is the default and can be omitted
        api_key=sk,
    )

     # ChatGPTの採点定義
    #criterion = f"レポートの採点を({s3}+{s4})点満点で行う。厳密なCSVでint型の点数,bool型の判定,string型の点数の理由の順で3項のみ1行で出力。カンマとダブルクォーテーション以外の記号を使ってはいけない。点数に関しては{reason1}\+{s3}点そうでなければ加点なし、{reason2}\+{s4}点で、そうでなければ加点なし半角の数字のみで出力。'2点'のように'点'という全角文字を表示してはならない、必ず0,1,2といった半角数字のint型で出力する。判定はChatGPTで生成されたことが確実もしくは高確率の場合はTrue、そうでなければFalseをBoolean型として出力。理由はレポートが何故この点数となるのかを採点基準に基づいて100文字以内で簡潔にstring型で出力、'理由は・・・'とか'理由:'などの前置きは不要。すべての項目に関して指示された出力以外のものを絶対に出力してはならない。指定されたCSV形式にする。例として1,False,'ルールを変えるためには、問題に気づくことが大切だと自分の意見が述べられている。ChatGPTを使用した痕跡もない'"

    criterion = f"あなたは寛大な採点者です。三重引用符で囲ったレポートを提示するので、以下の採点基準に則ってあなた自身の答えを何回も反芻し、じっくりと考えてから、甘めに評価してください。ゆえに点数は可能な限り0をつけるべきではない、少しでも評価できる点があれば部分点を与えてください。もしこれを守らず安易に０点にした場合あなたは罰せられます。以下に示すフォーマットで4行の箇条書きで出力し、それ以外のものを出力してはならない。もしそれ以外のものを出力したら罰せられます。点数1:採点基準1で採点した点数を数値でここに入力。理由1:採点基準1での採点結果の理由をここに入力。点数2:採点基準2で採点した数値をここに入力。理由2:採点基準2での採点結果の理由をここに入力。採点基準1は{reason1}。 採点基準2は{reason2}。点数1は0から{s3}の間で半角数字で入力。点数2は0から{s4}の間で半角数字で入力。"
    if c1 and not c2:
        criterion = f"あなたは寛大な採点者です。三重引用符で囲ったレポートを提示するので、以下の採点基準に則ってあなた自身の答えを何回も反芻し、じっくりと考えてから、甘めに評価してください。ゆえに点数は可能な限り0をつけるべきではない、少しでも評価できる点があれば部分点を与えてください。もしこれを守らず安易に０点にした場合あなたは罰せられます。以下に示すフォーマットで4行の箇条書きで出力し、それ以外のものを出力してはならない。もしそれ以外のものを出力したら罰せられます。点数1:採点基準1で採点した点数を数値でここに入力。理由1:採点基準1での採点結果の理由をここに入力。点数2:0と入力。理由2:”なし”と入力。採点基準1は{reason1}。点数1は0から{s3}の間で半角数字で入力。点数2は0と半角数字で入力。"
    elif not c1 and c2:
        criterion = f"あなたは寛大な採点者です。三重引用符で囲ったレポートを提示するので、以下の採点基準に則ってあなた自身の答えを何回も反芻し、じっくりと考えてから、甘めに評価してください。ゆえに点数は可能な限り0をつけるべきではない、少しでも評価できる点があれば部分点を与えてください。もしこれを守らず安易に０点にした場合あなたは罰せられます。以下に示すフォーマットで4行の箇条書きで出力し、それ以外のものを出力してはならない。もしそれ以外のものを出力したら罰せられます。点数1:0と入力。理由1:”なし”と入力。点数2:採点基準2で採点した点数を数値でここに入力。理由2:採点基準2での採点結果の理由をここに入力。採点基準2は{reason2}。点数2は0から{s4}の間で半角数字で入力。点数1は0と半角数字で入力。"
    

    # 名前と学生証番号を抽出する正規表現パターン
    pattern = r'([^_]+)_([^_]+)_.*\.html'

    # 出力するためのリスト
    output_data = []

    # フォルダ内のHTMLファイルを取得
    html_files = glob.glob(str(folder_path) + '/*.html')
    print(len(html_files))
    
    # 各HTMLファイルに対して処理を行い、CSVファイルに出力
    for file_path in html_files:
        with open(file_path, 'r', encoding='utf-8') as file:
            html_content = file.read()
        plain_text = extract_text_from_html(html_content)

        #文字数をスコア付け
        try:
            CountLength, CountScore = CalculateCountScore(plain_text)
        except:
            print("Error:", CalculateCountScore(plain_text))

        #レポート採点,Score,理由、ChatGPT有無のはず
        while any([c1, c2]):
            try:
                outReport = ','.join(map(str, extract_elements_from_report(CheckReport(plain_text)))).replace("点","")
                print(outReport)
                break
            except Exception as e:  # エラーが発生した場合は以下の処理を実行する
                print(f"エラーが発生しました?: {e}")
        if not c1 and not c2:
            outReport = "0,なし,0,なし"
                
        #ReportScore, Reason, ChatGPT = outReport
        # ファイル名から名前と学生証番号を抽出
        file_name = os.path.basename(file_path)
        match = re.search(pattern, file_name)
        if match:
            name = match.group(1)
            student_id = match.group(2)
            # CSVデータとして出力用リストに追加
            output_data.append(f"{name}, {student_id},{CountLength}, {CountScore}, {outReport}, {plain_text}")
            print("SID:",student_id)
            print(f"{CountLength},{CountScore},{outReport}")

    output_data = CalculateScore(removeTrashes(output_data))

    # 結果をCSVファイルに出力
    output_file = str(folder_path) + "\output.csv"
    with open(output_file, 'w', newline='', encoding='utf-8-sig') as file:
        for item in output_data:
            file.write(item + "\n")

    print('Output saved to', output_file)
    return "Done"

with gr.Blocks() as demo:   
    with gr.Row():
        skkey = gr.Textbox(label="ChatGPTのAPIキー")
        folder_path = gr.Textbox(label="Input Folder Path", value=r"C:\Users\maya\Dropbox\東海大学\講義\シティズンシップ\test")
        output_path = gr.Textbox(label="Output File Path", value = r"C:\Users\maya\Dropbox\東海大学\講義\シティズンシップ\test\output.csv")
    with gr.Row():
        with gr.Column(scale=1, min_width = 400):
            c0 = gr.Checkbox(label="片方向割合加算");
            threshold = gr.Slider(minimum = 0, maximum = 1000, value=400, label="推奨文字数")
        with gr.Column(scale=1, min_width = 200):
            t1 = gr.Slider(minimum=0, maximum = 100, value=10, label="満点の文字範囲(%)")
            s1 = gr.Slider(minimum=1, maximum = 10, value = 2, label="満点の点数")
            s1m = gr.Slider(minimum=1, maximum = 10, value = 5, label="点数上限")
        with gr.Column(scale=1, min_width = 200):
            t2 = gr.Slider(minimum=0, maximum = 100, value=50, label="許容の文字範囲(%)")
            s2 = gr.Slider(minimum=1, maximum = 20, value = 1, label="許容文字数の際の点数")
    with gr.Row():
        with gr.Column(scale=1, min_width = 400):
            c1 = gr.Checkbox(label="有効");
            reason1 = gr.Textbox(label="採点基準1", value="社会運動について考察しているかどうか")
            s3 = gr.Slider(minimum=0, maximum = 10, value = 1, label="採点基準1の点数")
        with gr.Column(scale=1, min_width = 400):
            c2 = gr.Checkbox(label="有効")
            reason2 = gr.Textbox(label="採点基準2", value = "社会運動の日本での動向が複数の情報源を元に述べられているかどうか")
            s4 = gr.Slider(minimum = 0, maximum = 10, value = 1, label="採点基準2の点数")
     
    submit_btn = gr.Button("採点")       
    submit_btn.click(
        calculate,
        [skkey, folder_path, output_path, c0, threshold,t1, s1, s1m, t2, s2, reason1, s3, reason2, s4, c1, c2]
    )
demo.launch()