Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| import hashlib, tempfile, os, time | |
| from datetime import datetime, timezone | |
| import sqlite3 | |
| import random | |
| # 假设这些模块在其他地方定义 | |
| from config import CSS, DIMS | |
| from OVAL import oval_scores | |
| from DeepEval import deepeval_scores | |
| # 全局配置 | |
| DAILY_LIMIT = 150 # 每日全局限制次数 | |
| REQUEST_INTERVAL = 9 # 请求间隔(秒) | |
| DB_FILE = "usage_tracker.db" # SQLite数据库文件名 | |
| def init_db(): | |
| """初始化SQLite数据库""" | |
| conn = sqlite3.connect(DB_FILE) | |
| c = conn.cursor() | |
| # 创建全局计数器表 | |
| c.execute(''' | |
| CREATE TABLE IF NOT EXISTS global_stats ( | |
| id INTEGER PRIMARY KEY, | |
| date TEXT NOT NULL, | |
| count INTEGER NOT NULL, | |
| last_request REAL NOT NULL | |
| ) | |
| ''') | |
| # 确保只有一条记录 | |
| c.execute("SELECT COUNT(*) FROM global_stats") | |
| count = c.fetchone()[0] | |
| if count == 0: | |
| c.execute("INSERT INTO global_stats (date, count, last_request) VALUES (?, ?, ?)", | |
| (get_utc_date(), 0, time.time())) | |
| conn.commit() | |
| conn.close() | |
| def get_utc_date(): | |
| """获取UTC+0的日期字符串""" | |
| return datetime.now(timezone.utc).strftime("%Y-%m-%d") | |
| def check_daily_limit(): | |
| """检查今日全局请求次数是否超限""" | |
| today = get_utc_date() | |
| conn = sqlite3.connect(DB_FILE) | |
| c = conn.cursor() | |
| c.execute("SELECT date, count, last_request FROM global_stats WHERE id = 1") | |
| row = c.fetchone() | |
| if not row: | |
| # 如果记录不存在,初始化 | |
| c.execute("INSERT INTO global_stats (date, count, last_request) VALUES (?, ?, ?)", | |
| (today, 0, time.time())) | |
| count = 0 | |
| else: | |
| db_date, count, last_request = row | |
| # 如果是新的一天,重置计数 | |
| if db_date != today: | |
| c.execute("UPDATE global_stats SET date = ?, count = ?, last_request = ? WHERE id = 1", | |
| (today, 0, time.time())) | |
| count = 0 | |
| conn.commit() | |
| conn.close() | |
| return count >= DAILY_LIMIT, count | |
| def update_request_count(): | |
| """更新全局请求计数""" | |
| today = get_utc_date() | |
| current_time = time.time() | |
| conn = sqlite3.connect(DB_FILE) | |
| c = conn.cursor() | |
| c.execute("SELECT date, count FROM global_stats WHERE id = 1") | |
| row = c.fetchone() | |
| if not row: | |
| # 如果记录不存在,初始化 | |
| c.execute("INSERT INTO global_stats (date, count, last_request) VALUES (?, ?, ?)", | |
| (today, 1, current_time)) | |
| count = 1 | |
| else: | |
| db_date, count = row | |
| # 如果是新的一天,重置计数 | |
| if db_date != today: | |
| c.execute("UPDATE global_stats SET date = ?, count = 1, last_request = ? WHERE id = 1", | |
| (today, current_time)) | |
| count = 1 | |
| else: | |
| # 增加计数 | |
| c.execute("UPDATE global_stats SET count = count + 1, last_request = ? WHERE id = 1", | |
| (current_time,)) | |
| count += 1 | |
| conn.commit() | |
| conn.close() | |
| return count, current_time | |
| def check_request_interval(): | |
| """检查请求间隔是否满足要求""" | |
| conn = sqlite3.connect(DB_FILE) | |
| c = conn.cursor() | |
| c.execute("SELECT last_request FROM global_stats WHERE id = 1") | |
| row = c.fetchone() | |
| if not row: | |
| return True # 如果记录不存在,允许请求 | |
| last_time = row[0] | |
| conn.close() | |
| return time.time() - last_time >= REQUEST_INTERVAL | |
| def generate_captcha(): | |
| """生成随机加法验证码""" | |
| num1 = random.randint(2, 8) | |
| num2 = random.randint(2, 8) | |
| return f"What's {num1} + {num2}?", num1 + num2 | |
| def make_explanation(system: str, dimension: str, score: float) -> str: | |
| templates = { | |
| # OVAL 拓展 5 维 | |
| "Structural Clarity": f"{system} scored Structural Clarity at {score}: The text structure may be unclear; consider adding headings or breaking into paragraphs.", | |
| "Reasoning Quality": f"{system} scored Reasoning Quality at {score}: Argument support is weak; consider adding logical reasoning or evidence.", | |
| "Factuality": f"{system} scored Factuality at {score}: Information may be inaccurate; please fact-check the facts.", | |
| "Depth of Analysis": f"{system} scored Depth of Analysis at {score}: Analysis seems shallow; add more insights or examples.", | |
| "Topic Coverage": f"{system} scored Topic Coverage at {score}: Key aspects may be missing; ensure you cover the full scope.", | |
| # DeepEval 拓展 5 维 | |
| "Fluency": f"{system} scored Fluency at {score}: Expression may be disfluent; consider smoothing sentence transitions.", | |
| "Prompt Relevance": f"{system} scored Prompt Relevance at {score}: The response may stray from the prompt; ensure alignment.", | |
| "Conciseness": f"{system} scored Conciseness at {score}: The response may be verbose; consider trimming redundant parts.", | |
| "Readability": f"{system} scored Readability at {score}: The text is hard to read; consider simpler wording or shorter sentences.", | |
| "Engagement": f"{system} scored Engagement at {score}: The response lacks engagement; add examples or a conversational tone.", | |
| } | |
| return templates.get(dimension, f"{system} scored {dimension} at {score}: Low score detected; please review this aspect.") | |
| def evaluate( | |
| prompt_text: str, | |
| output_text: str, | |
| # Prompt 主观 5 维度 | |
| s1: float, s2: float, s3: float, s4: float, s5: float, | |
| # Prompt 主观解释 | |
| e1: str, e2: str, e3: str, e4: str, e5: str, | |
| # Judge 模块 | |
| judge_llm: str, | |
| ja1: float, ja2: float, ja3: float, ja4: float, ja5: float, | |
| judge_remark: str, | |
| # 额外备注 | |
| remark: str, | |
| # 验证码 | |
| captcha_answer: str, | |
| correct_answer: int, | |
| # 会话状态 | |
| session_state: dict | |
| ): | |
| # 1) 验证全局请求状态 | |
| is_limited, current_count = check_daily_limit() | |
| # 检查是否达到每日限制 | |
| if is_limited: | |
| return ( | |
| gr.update(visible=True), # 显示限制提示 | |
| gr.update(visible=False), # 隐藏结果区域 | |
| None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, current_count, None, None | |
| ) | |
| # 检查请求间隔 | |
| if not check_request_interval(): | |
| with sqlite3.connect(DB_FILE) as conn: | |
| c = conn.cursor() | |
| c.execute("SELECT last_request FROM global_stats WHERE id = 1") | |
| last_time = c.fetchone()[0] | |
| remaining_time = REQUEST_INTERVAL - (time.time() - last_time) | |
| raise gr.Error(f"请等待 {remaining_time:.1f} 秒后再试") | |
| # 检查验证码 | |
| try: | |
| if int(captcha_answer) != correct_answer: | |
| raise gr.Error("Verification code error, please try again") | |
| except (ValueError, TypeError): | |
| raise gr.Error("Please enter the correct verification code") | |
| # 2) 更新全局请求计数 | |
| count, last_request = update_request_count() | |
| # 3) 验证 Prompt 主观低分必须解释 | |
| for score, exp, label in [ | |
| (s1, e1, "Clarity"), | |
| (s2, e2, "Scope Definition"), | |
| (s3, e3, "Intent Alignment"), | |
| (s4, e4, "Bias / Induction"), | |
| (s5, e5, "Efficiency"), | |
| ]: | |
| if score < 3 and not exp.strip(): | |
| raise gr.Error(f"{label} score < 3: please provide an explanation.") | |
| # 4) 构造三组分数 | |
| subj = [s1, s2, s3, s4, s5] + [None]*10 | |
| # 获取完整的OVAL和DeepEval分数 | |
| full_oval = oval_scores(output_text) | |
| full_deep = deepeval_scores(prompt_text, output_text) | |
| # 灰化指定的维度(将对应分数设为None) | |
| # OVAL的Factuality(索引7)和Topic Coverage(索引9) | |
| full_oval[7] = None # Factuality | |
| full_oval[9] = None # Topic Coverage | |
| # DeepEval的Prompt Relevance(索引11)及Conciseness(索引12) | |
| full_deep[11] = None # Prompt Relevance | |
| full_deep[12] = None # Conciseness | |
| # 使用处理后的分数 | |
| oval = full_oval | |
| deep = full_deep | |
| # 5) 自动低分解释 | |
| auto_expls = [] | |
| for system, scores, idxs in [ | |
| ("OVAL", oval, range(5,10)), | |
| ("DeepEval", deep, range(10,15)) | |
| ]: | |
| for i in idxs: | |
| sc = scores[i] | |
| if sc is not None and sc < 3: | |
| auto_expls.append(make_explanation(system, DIMS[i], sc)) | |
| auto_text = "\n".join(auto_expls) or "All automated scores ≥ 3; no issues detected." | |
| # 6) 构建 DataFrame(包含 Judge 信息列) | |
| full_df = pd.DataFrame({ | |
| "Dimension": DIMS, | |
| "Subjective (Prompt)": subj, | |
| "OVAL (Output)": oval, | |
| "DeepEval (Output)": deep, | |
| "Judge LLM": [judge_llm] * len(DIMS), | |
| "Sensory Accuracy": [ja1] * len(DIMS), | |
| "Emotional Engagement": [ja2] * len(DIMS), | |
| "Flow & Naturalness": [ja3] * len(DIMS), | |
| "Imagery Completeness": [ja4] * len(DIMS), | |
| "Simplicity & Accessibility": [ja5] * len(DIMS), | |
| "Judge Remarks": [judge_remark] * len(DIMS), | |
| "Notes (Slang/Tech Terms)": [remark] * len(DIMS), | |
| }) | |
| # 7) 提取子表 | |
| subj_df = full_df.iloc[0:5][["Dimension","Subjective (Prompt)"]] | |
| oval_df = full_df.iloc[5:10][["Dimension","OVAL (Output)"]] | |
| deep_df = full_df.iloc[10:15][["Dimension","DeepEval (Output)"]] | |
| # 8) 构造雷达图(取三类分数最大值) | |
| max_scores = [ | |
| max([v for v in vals if v is not None]) if any(v is not None for v in vals) else 0 | |
| for vals in zip(subj, oval, deep) | |
| ] | |
| closed_dims = DIMS + [DIMS[0]] | |
| r = max_scores + [max_scores[0]] | |
| fig = go.Figure(go.Scatterpolar(r=r, theta=closed_dims, fill='toself')) | |
| fig.update_layout( | |
| polar=dict(radialaxis=dict(visible=True, range=[0,5])), | |
| showlegend=False, | |
| title="Final (Max) Scores Radar" | |
| ) | |
| # 更新页面底部的计数器显示 | |
| return ( | |
| gr.update(visible=False), # 隐藏限制提示 | |
| gr.update(visible=True), # 显示结果区域 | |
| subj_df, | |
| oval_df, | |
| deep_df, | |
| fig, | |
| None, # 不生成CSV文件 | |
| remark, | |
| e1, e2, e3, e4, e5, | |
| auto_text, | |
| judge_llm, | |
| ja1, ja2, ja3, ja4, ja5, | |
| judge_remark, | |
| *generate_captcha(), # 生成新的验证码 | |
| count, # 返回当前全局计数 | |
| "expanded", # Judge区块默认展开 | |
| gr.update(value=f"Today Counts: {count}/{DAILY_LIMIT}") # 更新底部计数器 | |
| ) | |
| def toggle_explain(v): | |
| return gr.update(visible=(v<3)) | |
| def check_daily_limit_state(): | |
| """检查全局状态并更新UI显示""" | |
| is_limited, current_count = check_daily_limit() | |
| return ( | |
| gr.update(visible=is_limited), # 限制提示 | |
| gr.update(visible=not is_limited), # 启用提交按钮 | |
| gr.update(visible=not is_limited), # 显示结果区域 | |
| f"Today Counts: {current_count}/{DAILY_LIMIT}", # 更新计数器文本 | |
| gr.update(value=f"Today Counts: {current_count}/{DAILY_LIMIT}") # 更新底部计数器 | |
| ) | |
| def show_personal_version_notice(): | |
| """显示个人版本提示""" | |
| raise gr.Error("Only for coming personal version.") | |
| def toggle_judge_section(visible): | |
| """切换Judge部分的显示状态""" | |
| return gr.update(visible=(visible == "expanded")), gr.update(value=("Collapse" if visible == "expanded" else "Expand")) | |
| css = """ | |
| #submit-btn { | |
| background-color: orange !important; | |
| color: white !important; | |
| border: none !important; | |
| } | |
| #submit-btn:hover { | |
| background-color: darkorange !important; | |
| } | |
| .limit-notice { | |
| background-color: #ffcccc; | |
| border: 1px solid #ff6666; | |
| padding: 10px; | |
| border-radius: 5px; | |
| margin: 10px 0; | |
| } | |
| .upgrade-notice { | |
| background-color: #e6f7ff; | |
| border: 1px solid #91d5ff; | |
| padding: 10px; | |
| border-radius: 5px; | |
| margin: 10px 0; | |
| } | |
| .welcome-notice { | |
| background-color: #fff7e6; | |
| border: 1px solid #ffd591; | |
| padding: 10px; | |
| border-radius: 5px; | |
| margin: 10px 0; | |
| } | |
| .disabled-dimension { | |
| color: #888; | |
| font-style: italic; | |
| } | |
| .example-label { | |
| font-weight: bold; | |
| color: #666; | |
| margin-top: 10px; | |
| } | |
| .daily-count { | |
| font-size: 16px; | |
| font-weight: bold; | |
| margin-top: 15px; | |
| text-align: center; | |
| } | |
| .judge-section { | |
| border: 1px solid #ddd; | |
| border-radius: 5px; | |
| margin-top: 10px; | |
| } | |
| .judge-header { | |
| cursor: pointer; | |
| padding: 10px; | |
| background-color: #f5f5f5; | |
| display: flex; | |
| justify-content: space-between; | |
| align-items: center; | |
| } | |
| .judge-content { | |
| padding: 10px; | |
| } | |
| """ | |
| # 初始化数据库 | |
| init_db() | |
| with gr.Blocks(css=css) as iface: | |
| # 会话状态 | |
| session_state = gr.State({}) | |
| judge_section_state = gr.State("expanded") # 初始为展开状态 | |
| # 顶部欢迎语和限制说明 | |
| gr.Markdown(""" | |
| <div class="welcome-notice"> | |
| <h3>👋 Hey there! You're using the ECHOscore demo.</h3> | |
| <p>It's a lighter version with limited features.</p> | |
| <p>For the full power, grab the desktop version(coming soon)!</p> | |
| </div> | |
| """) | |
| # 每日限制提示(初始隐藏) | |
| limit_notice = gr.Markdown(""" | |
| <div class="limit-notice"> | |
| <h3>⚠️ Oops! Daily limit reached.</h3> | |
| <p>Tomorrow’s a new day — or skip the wait with desktop version (coming soon)!</p> | |
| </div> | |
| """, visible=False) | |
| gr.Markdown("# ECHOscore – Prompt vs Output Evaluation") | |
| # 当前使用情况 | |
| daily_count = gr.Textbox(label="Daily Counts", value="Today Counts: 0/150", interactive=False, visible=False) | |
| with gr.Row(): | |
| prompt_in = gr.Textbox(lines=4, label="Input (Prompt)") | |
| output_in = gr.Textbox(lines=4, label="Output (Model Response)") | |
| # verification code | |
| captcha_text = gr.Textbox(label="verification code", interactive=False) | |
| captcha_answer = gr.Textbox(label="Please enter the calculation result", placeholder="Verification code answer") | |
| correct_answer = gr.State(8) # 初始值,会在页面加载时更新 | |
| with gr.Row(): | |
| s1 = gr.Slider(0,5,0,step=0.1, label="Prompt – Clarity") | |
| s2 = gr.Slider(0,5,0,step=0.1, label="Prompt – Scope Definition") | |
| s3 = gr.Slider(0,5,0,step=0.1, label="Prompt – Intent Alignment") | |
| s4 = gr.Slider(0,5,0,step=0.1, label="Prompt – Bias / Induction") | |
| s5 = gr.Slider(0,5,0,step=0.1, label="Prompt – Efficiency") | |
| e1 = gr.Textbox(lines=2, label="Explain Clarity (<3)", visible=False) | |
| e2 = gr.Textbox(lines=2, label="Explain Scope Definition (<3)", visible=False) | |
| e3 = gr.Textbox(lines=2, label="Explain Intent Alignment (<3)", visible=False) | |
| e4 = gr.Textbox(lines=2, label="Explain Bias / Induction (<3)", visible=False) | |
| e5 = gr.Textbox(lines=2, label="Explain Efficiency (<3)", visible=False) | |
| remark = gr.Textbox(lines=2, label="Internet slang & technical terms notes (optional)") | |
| # Judge模块 - 可折叠/展开 | |
| with gr.Row(): | |
| with gr.Column(scale=12): | |
| judge_header = gr.Markdown(""" | |
| <div class="judge-header"> | |
| <span>LLM-as-a-Judge (optional)</span> | |
| </div> | |
| """) | |
| with gr.Column(scale=1, visible=False): | |
| toggle_judge_btn = gr.Button("Collapse", visible=False) | |
| with gr.Row(visible=True) as judge_section: | |
| judge_llm = gr.Textbox(lines=1, label="LLM-as-a-Judge (optional-Place the NAME of LLM)") | |
| gr.Markdown("**LLM Scoring Examples**", elem_classes="example-label") | |
| ja1 = gr.Number(label="Sensory Accuracy (only for desktop version)", value=0, precision=1, step=0.1, interactive=False) | |
| ja2 = gr.Number(label="Emotional Engagement (only for desktop version)", value=0, precision=1, step=0.1, interactive=False) | |
| ja3 = gr.Number(label="Flow & Naturalness (only for desktop version)", value=0, precision=1, step=0.1, interactive=False) | |
| ja4 = gr.Number(label="Imagery Completeness (only for desktop version)", value=0, precision=1, step=0.1, interactive=False) | |
| ja5 = gr.Number(label="Simplicity & Accessibility (only for desktop version)", value=0, precision=1, step=0.1, interactive=False) | |
| judge_remark = gr.Textbox(lines=2, label="Judge Remarks (only for desktop version)", interactive=True) | |
| # 升级提示 | |
| gr.Markdown(""" | |
| <div class="upgrade-notice"> | |
| <h3>🔝 Unlock Full Features</h3> | |
| <p>Get access to all dimensions and unlimited evaluations.</p> | |
| <a href="https://www.echoscore.dev" target="_blank">Learn more about ECHOscore</a> | |
| </div> | |
| """) | |
| s1.change(toggle_explain, s1, e1) | |
| s2.change(toggle_explain, s2, e2) | |
| s3.change(toggle_explain, s3, e3) | |
| s4.change(toggle_explain, s4, e4) | |
| s5.change(toggle_explain, s5, e5) | |
| # 结果区域(初始隐藏) | |
| with gr.Row(visible=False) as results_area: | |
| subj_tbl = gr.Dataframe(label="Prompt Subjective Scores") | |
| oval_tbl = gr.Dataframe(label="OVAL Automated Scores") | |
| deep_tbl = gr.Dataframe(label="DeepEval Automated Scores") | |
| radar = gr.Plot(label="Final Radar Chart") | |
| csv_out = gr.File(label="Export CSV") | |
| notes_out = gr.Textbox(label="Notes (Slang/Tech Terms)") | |
| exp1_out = gr.Textbox(label="Clarity Explanation") | |
| exp2_out = gr.Textbox(label="Scope Definition Explanation") | |
| exp3_out = gr.Textbox(label="Intent Alignment Explanation") | |
| exp4_out = gr.Textbox(label="Bias/Induction Explanation") | |
| exp5_out = gr.Textbox(label="Efficiency Explanation") | |
| auto_out = gr.Textbox(label="Automatic Explanation") | |
| judge_llm_out = gr.Textbox(label="LLM-as-a-Judge") | |
| ja1_out = gr.Number(label="Sensory Accuracy",visible=False) | |
| ja2_out = gr.Number(label="Emotional Engagement",visible=False) | |
| ja3_out = gr.Number(label="Flow & Naturalness",visible=False) | |
| ja4_out = gr.Number(label="Imagery Completeness",visible=False) | |
| ja5_out = gr.Number(label="Simplicity & Accessibility",visible=False) | |
| judge_remarks_out = gr.Textbox(label="Judge Remarks") | |
| submit = gr.Button("Submit", elem_id="submit-btn") | |
| # 新增:创建一个用于显示底部计数器的组件 | |
| footer_count = gr.Textbox(label="Today's Usage", value="Today Counts: 0/150", interactive=False, visible=True) | |
| gr.Markdown(""" | |
| <div> | |
| ⚠️ This is a **demo version** of ECHOscore. | |
| Data contribution, uploads, and edits are **not supported**. | |
| To try the full version, please download the desktop release. | |
| </div> | |
| """) | |
| # 初始化检查 | |
| iface.load( | |
| check_daily_limit_state, | |
| None, | |
| [limit_notice, submit, results_area, daily_count, footer_count] # 添加footer_count | |
| ) | |
| iface.load( | |
| lambda: generate_captcha(), | |
| None, | |
| [captcha_text, correct_answer] | |
| ) | |
| submit.click( | |
| evaluate, | |
| [ | |
| prompt_in, output_in, | |
| s1, s2, s3, s4, s5, | |
| e1, e2, e3, e4, e5, | |
| judge_llm, ja1, ja2, ja3, ja4, ja5, | |
| judge_remark, remark, | |
| captcha_answer, correct_answer, | |
| session_state | |
| ], | |
| [ | |
| limit_notice, results_area, | |
| subj_tbl, oval_tbl, deep_tbl, | |
| radar, csv_out, notes_out, | |
| exp1_out, exp2_out, exp3_out, exp4_out, exp5_out, | |
| auto_out, | |
| judge_llm_out, ja1_out, ja2_out, ja3_out, ja4_out, ja5_out, | |
| judge_remarks_out, | |
| captcha_text, correct_answer, | |
| daily_count, | |
| judge_section_state, # 更新Judge区块状态 | |
| footer_count # 更新底部计数器 | |
| ] | |
| ) | |
| # 点击CSV下载按钮时显示提示 | |
| csv_out.download(show_personal_version_notice) | |
| # 切换Judge部分的显示状态 | |
| toggle_judge_btn.click( | |
| lambda x: ("expanded" if x == "collapsed" else "collapsed"), | |
| judge_section_state, | |
| judge_section_state | |
| ).then( | |
| toggle_judge_section, | |
| judge_section_state, | |
| [judge_section, toggle_judge_btn] | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() |