File size: 20,553 Bytes

import gradio as gr
import pandas as pd
import plotly.graph_objects as go
import hashlib, tempfile, os, time
from datetime import datetime, timezone
import sqlite3
import random

# 假设这些模块在其他地方定义
from config import CSS, DIMS
from OVAL import oval_scores
from DeepEval import deepeval_scores

# 全局配置
DAILY_LIMIT = 150  # 每日全局限制次数
REQUEST_INTERVAL = 9  # 请求间隔(秒)
DB_FILE = "usage_tracker.db"  # SQLite数据库文件名

def init_db():
    """初始化SQLite数据库"""
    conn = sqlite3.connect(DB_FILE)
    c = conn.cursor()
    
    # 创建全局计数器表
    c.execute('''
    CREATE TABLE IF NOT EXISTS global_stats (
        id INTEGER PRIMARY KEY,
        date TEXT NOT NULL,
        count INTEGER NOT NULL,
        last_request REAL NOT NULL
    )
    ''')
    
    # 确保只有一条记录
    c.execute("SELECT COUNT(*) FROM global_stats")
    count = c.fetchone()[0]
    if count == 0:
        c.execute("INSERT INTO global_stats (date, count, last_request) VALUES (?, ?, ?)",
                  (get_utc_date(), 0, time.time()))
    
    conn.commit()
    conn.close()

def get_utc_date():
    """获取UTC+0的日期字符串"""
    return datetime.now(timezone.utc).strftime("%Y-%m-%d")

def check_daily_limit():
    """检查今日全局请求次数是否超限"""
    today = get_utc_date()
    conn = sqlite3.connect(DB_FILE)
    c = conn.cursor()
    
    c.execute("SELECT date, count, last_request FROM global_stats WHERE id = 1")
    row = c.fetchone()
    
    if not row:
        # 如果记录不存在，初始化
        c.execute("INSERT INTO global_stats (date, count, last_request) VALUES (?, ?, ?)",
                  (today, 0, time.time()))
        count = 0
    else:
        db_date, count, last_request = row
        
        # 如果是新的一天，重置计数
        if db_date != today:
            c.execute("UPDATE global_stats SET date = ?, count = ?, last_request = ? WHERE id = 1",
                      (today, 0, time.time()))
            count = 0
    
    conn.commit()
    conn.close()
    
    return count >= DAILY_LIMIT, count

def update_request_count():
    """更新全局请求计数"""
    today = get_utc_date()
    current_time = time.time()
    
    conn = sqlite3.connect(DB_FILE)
    c = conn.cursor()
    
    c.execute("SELECT date, count FROM global_stats WHERE id = 1")
    row = c.fetchone()
    
    if not row:
        # 如果记录不存在，初始化
        c.execute("INSERT INTO global_stats (date, count, last_request) VALUES (?, ?, ?)",
                  (today, 1, current_time))
        count = 1
    else:
        db_date, count = row
        
        # 如果是新的一天，重置计数
        if db_date != today:
            c.execute("UPDATE global_stats SET date = ?, count = 1, last_request = ? WHERE id = 1",
                      (today, current_time))
            count = 1
        else:
            # 增加计数
            c.execute("UPDATE global_stats SET count = count + 1, last_request = ? WHERE id = 1",
                      (current_time,))
            count += 1
    
    conn.commit()
    conn.close()
    
    return count, current_time

def check_request_interval():
    """检查请求间隔是否满足要求"""
    conn = sqlite3.connect(DB_FILE)
    c = conn.cursor()
    
    c.execute("SELECT last_request FROM global_stats WHERE id = 1")
    row = c.fetchone()
    
    if not row:
        return True  # 如果记录不存在，允许请求
    
    last_time = row[0]
    conn.close()
    
    return time.time() - last_time >= REQUEST_INTERVAL

def generate_captcha():
    """生成随机加法验证码"""
    num1 = random.randint(2, 8)
    num2 = random.randint(2, 8)
    return f"What's {num1} + {num2}?", num1 + num2

def make_explanation(system: str, dimension: str, score: float) -> str:
    templates = {
        # OVAL 拓展 5 维
        "Structural Clarity":  f"{system} scored Structural Clarity at {score}: The text structure may be unclear; consider adding headings or breaking into paragraphs.",
        "Reasoning Quality":   f"{system} scored Reasoning Quality at {score}: Argument support is weak; consider adding logical reasoning or evidence.",
        "Factuality":          f"{system} scored Factuality at {score}: Information may be inaccurate; please fact-check the facts.",
        "Depth of Analysis":   f"{system} scored Depth of Analysis at {score}: Analysis seems shallow; add more insights or examples.",
        "Topic Coverage":      f"{system} scored Topic Coverage at {score}: Key aspects may be missing; ensure you cover the full scope.",
        # DeepEval 拓展 5 维
        "Fluency":             f"{system} scored Fluency at {score}: Expression may be disfluent; consider smoothing sentence transitions.",
        "Prompt Relevance":    f"{system} scored Prompt Relevance at {score}: The response may stray from the prompt; ensure alignment.",
        "Conciseness":         f"{system} scored Conciseness at {score}: The response may be verbose; consider trimming redundant parts.",
        "Readability":         f"{system} scored Readability at {score}: The text is hard to read; consider simpler wording or shorter sentences.",
        "Engagement":          f"{system} scored Engagement at {score}: The response lacks engagement; add examples or a conversational tone.",
    }
    return templates.get(dimension, f"{system} scored {dimension} at {score}: Low score detected; please review this aspect.")

def evaluate(
    prompt_text: str,
    output_text: str,
    # Prompt 主观 5 维度
    s1: float, s2: float, s3: float, s4: float, s5: float,
    # Prompt 主观解释
    e1: str, e2: str, e3: str, e4: str, e5: str,
    # Judge 模块
    judge_llm: str,
    ja1: float, ja2: float, ja3: float, ja4: float, ja5: float,
    judge_remark: str,
    # 额外备注
    remark: str,
    # 验证码
    captcha_answer: str,
    correct_answer: int,
    # 会话状态
    session_state: dict
):
    # 1) 验证全局请求状态
    is_limited, current_count = check_daily_limit()
    
    # 检查是否达到每日限制
    if is_limited:
        return (
            gr.update(visible=True),  # 显示限制提示
            gr.update(visible=False), # 隐藏结果区域
            None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, current_count, None, None
        )
    
    # 检查请求间隔
    if not check_request_interval():
        with sqlite3.connect(DB_FILE) as conn:
            c = conn.cursor()
            c.execute("SELECT last_request FROM global_stats WHERE id = 1")
            last_time = c.fetchone()[0]
            remaining_time = REQUEST_INTERVAL - (time.time() - last_time)
        raise gr.Error(f"请等待 {remaining_time:.1f} 秒后再试")
    
    # 检查验证码
    try:
        if int(captcha_answer) != correct_answer:
            raise gr.Error("Verification code error, please try again")
    except (ValueError, TypeError):
        raise gr.Error("Please enter the correct verification code")
    
    # 2) 更新全局请求计数
    count, last_request = update_request_count()
    
    # 3) 验证 Prompt 主观低分必须解释
    for score, exp, label in [
        (s1, e1, "Clarity"),
        (s2, e2, "Scope Definition"),
        (s3, e3, "Intent Alignment"),
        (s4, e4, "Bias / Induction"),
        (s5, e5, "Efficiency"),
    ]:
        if score < 3 and not exp.strip():
            raise gr.Error(f"{label} score < 3: please provide an explanation.")

    # 4) 构造三组分数
    subj = [s1, s2, s3, s4, s5] + [None]*10
    
    # 获取完整的OVAL和DeepEval分数
    full_oval = oval_scores(output_text)
    full_deep = deepeval_scores(prompt_text, output_text)
    
    # 灰化指定的维度（将对应分数设为None）
    # OVAL的Factuality(索引7)和Topic Coverage(索引9)
    full_oval[7] = None  # Factuality
    full_oval[9] = None  # Topic Coverage
    
    # DeepEval的Prompt Relevance(索引11)及Conciseness(索引12)
    full_deep[11] = None  # Prompt Relevance
    full_deep[12] = None  # Conciseness
    
    # 使用处理后的分数
    oval = full_oval
    deep = full_deep

    # 5) 自动低分解释
    auto_expls = []
    for system, scores, idxs in [
        ("OVAL",     oval, range(5,10)),
        ("DeepEval", deep, range(10,15))
    ]:
        for i in idxs:
            sc = scores[i]
            if sc is not None and sc < 3:
                auto_expls.append(make_explanation(system, DIMS[i], sc))
    auto_text = "\n".join(auto_expls) or "All automated scores ≥ 3; no issues detected."

    # 6) 构建 DataFrame（包含 Judge 信息列）
    full_df = pd.DataFrame({
        "Dimension":                   DIMS,
        "Subjective (Prompt)":         subj,
        "OVAL (Output)":               oval,
        "DeepEval (Output)":           deep,
        "Judge LLM":                   [judge_llm] * len(DIMS),
        "Sensory Accuracy":            [ja1] * len(DIMS),
        "Emotional Engagement":        [ja2] * len(DIMS),
        "Flow & Naturalness":          [ja3] * len(DIMS),
        "Imagery Completeness":        [ja4] * len(DIMS),
        "Simplicity & Accessibility":  [ja5] * len(DIMS),
        "Judge Remarks":               [judge_remark] * len(DIMS),
        "Notes (Slang/Tech Terms)":    [remark] * len(DIMS),
    })

    # 7) 提取子表
    subj_df = full_df.iloc[0:5][["Dimension","Subjective (Prompt)"]]
    oval_df = full_df.iloc[5:10][["Dimension","OVAL (Output)"]]
    deep_df = full_df.iloc[10:15][["Dimension","DeepEval (Output)"]]

    # 8) 构造雷达图（取三类分数最大值）
    max_scores = [
        max([v for v in vals if v is not None]) if any(v is not None for v in vals) else 0
        for vals in zip(subj, oval, deep)
    ]
    closed_dims = DIMS + [DIMS[0]]
    r = max_scores + [max_scores[0]]
    fig = go.Figure(go.Scatterpolar(r=r, theta=closed_dims, fill='toself'))
    fig.update_layout(
        polar=dict(radialaxis=dict(visible=True, range=[0,5])),
        showlegend=False,
        title="Final (Max) Scores Radar"
    )

    # 更新页面底部的计数器显示
    return (
        gr.update(visible=False),  # 隐藏限制提示
        gr.update(visible=True),   # 显示结果区域
        subj_df,
        oval_df,
        deep_df,
        fig,
        None,  # 不生成CSV文件
        remark,
        e1, e2, e3, e4, e5,
        auto_text,
        judge_llm,
        ja1, ja2, ja3, ja4, ja5,
        judge_remark,
        *generate_captcha(),  # 生成新的验证码
        count,  # 返回当前全局计数
        "expanded",  # Judge区块默认展开
        gr.update(value=f"Today Counts: {count}/{DAILY_LIMIT}")  # 更新底部计数器
    )

def toggle_explain(v): 
    return gr.update(visible=(v<3))

def check_daily_limit_state():
    """检查全局状态并更新UI显示"""
    is_limited, current_count = check_daily_limit()
    
    return (
        gr.update(visible=is_limited),  # 限制提示
        gr.update(visible=not is_limited),  # 启用提交按钮
        gr.update(visible=not is_limited),  # 显示结果区域
        f"Today Counts: {current_count}/{DAILY_LIMIT}",  # 更新计数器文本
        gr.update(value=f"Today Counts: {current_count}/{DAILY_LIMIT}")  # 更新底部计数器
    )

def show_personal_version_notice():
    """显示个人版本提示"""
    raise gr.Error("Only for coming personal version.")

def toggle_judge_section(visible):
    """切换Judge部分的显示状态"""
    return gr.update(visible=(visible == "expanded")), gr.update(value=("Collapse" if visible == "expanded" else "Expand"))

css = """
#submit-btn {
  background-color: orange !important;
  color: white !important;
  border: none !important;
}
#submit-btn:hover {
  background-color: darkorange !important;
}
.limit-notice {
  background-color: #ffcccc;
  border: 1px solid #ff6666;
  padding: 10px;
  border-radius: 5px;
  margin: 10px 0;
}
.upgrade-notice {
  background-color: #e6f7ff;
  border: 1px solid #91d5ff;
  padding: 10px;
  border-radius: 5px;
  margin: 10px 0;
}
.welcome-notice {
  background-color: #fff7e6;
  border: 1px solid #ffd591;
  padding: 10px;
  border-radius: 5px;
  margin: 10px 0;
}
.disabled-dimension {
  color: #888;
  font-style: italic;
}
.example-label {
  font-weight: bold;
  color: #666;
  margin-top: 10px;
}
.daily-count {
  font-size: 16px;
  font-weight: bold;
  margin-top: 15px;
  text-align: center;
}
.judge-section {
  border: 1px solid #ddd;
  border-radius: 5px;
  margin-top: 10px;
}
.judge-header {
  cursor: pointer;
  padding: 10px;
  background-color: #f5f5f5;
  display: flex;
  justify-content: space-between;
  align-items: center;
}
.judge-content {
  padding: 10px;
}
"""

# 初始化数据库
init_db()

with gr.Blocks(css=css) as iface:
    # 会话状态
    session_state = gr.State({})
    judge_section_state = gr.State("expanded")  # 初始为展开状态
    
    # 顶部欢迎语和限制说明
    gr.Markdown("""
    <div class="welcome-notice">
    <h3>👋 Hey there! You're using the ECHOscore demo.</h3>
    <p>It's a lighter version with limited features.</p>
    <p>For the full power, grab the desktop version(coming soon)!</p>
    </div>
    """)
    
    # 每日限制提示（初始隐藏）
    limit_notice = gr.Markdown("""
    <div class="limit-notice">
    <h3>⚠️ Oops! Daily limit reached.</h3>
    <p>Tomorrow’s a new day — or skip the wait with desktop version (coming soon)!</p>
    </div>
    """, visible=False)
    
    gr.Markdown("# ECHOscore – Prompt vs Output Evaluation")
    
    # 当前使用情况
    daily_count = gr.Textbox(label="Daily Counts", value="Today Counts: 0/150", interactive=False, visible=False)
    
    with gr.Row():
        prompt_in = gr.Textbox(lines=4, label="Input (Prompt)")
        output_in = gr.Textbox(lines=4, label="Output (Model Response)")
    
    # verification code
    captcha_text = gr.Textbox(label="verification code", interactive=False)
    captcha_answer = gr.Textbox(label="Please enter the calculation result", placeholder="Verification code answer")
    correct_answer = gr.State(8)  # 初始值，会在页面加载时更新
    
    with gr.Row():
        s1 = gr.Slider(0,5,0,step=0.1, label="Prompt – Clarity")
        s2 = gr.Slider(0,5,0,step=0.1, label="Prompt – Scope Definition")
        s3 = gr.Slider(0,5,0,step=0.1, label="Prompt – Intent Alignment")
        s4 = gr.Slider(0,5,0,step=0.1, label="Prompt – Bias / Induction")
        s5 = gr.Slider(0,5,0,step=0.1, label="Prompt – Efficiency")
    
    e1 = gr.Textbox(lines=2, label="Explain Clarity (<3)", visible=False)
    e2 = gr.Textbox(lines=2, label="Explain Scope Definition (<3)", visible=False)
    e3 = gr.Textbox(lines=2, label="Explain Intent Alignment (<3)", visible=False)
    e4 = gr.Textbox(lines=2, label="Explain Bias / Induction (<3)", visible=False)
    e5 = gr.Textbox(lines=2, label="Explain Efficiency (<3)", visible=False)
    
    remark = gr.Textbox(lines=2, label="Internet slang & technical terms notes (optional)")
    
    # Judge模块 - 可折叠/展开
    with gr.Row():
        with gr.Column(scale=12):
            judge_header = gr.Markdown("""
            <div class="judge-header">
                <span>LLM-as-a-Judge (optional)</span>
            </div>
            """)
        with gr.Column(scale=1, visible=False):
            toggle_judge_btn = gr.Button("Collapse", visible=False)
    
    with gr.Row(visible=True) as judge_section:
        judge_llm    = gr.Textbox(lines=1, label="LLM-as-a-Judge (optional-Place the NAME of LLM)")
        gr.Markdown("**LLM Scoring Examples**", elem_classes="example-label")
        ja1          = gr.Number(label="Sensory Accuracy (only for desktop version)", value=0, precision=1, step=0.1, interactive=False)
        ja2          = gr.Number(label="Emotional Engagement (only for desktop version)", value=0, precision=1, step=0.1, interactive=False)
        ja3          = gr.Number(label="Flow & Naturalness (only for desktop version)", value=0, precision=1, step=0.1, interactive=False)
        ja4          = gr.Number(label="Imagery Completeness (only for desktop version)", value=0, precision=1, step=0.1, interactive=False)
        ja5          = gr.Number(label="Simplicity & Accessibility (only for desktop version)", value=0, precision=1, step=0.1, interactive=False)
        judge_remark = gr.Textbox(lines=2, label="Judge Remarks (only for desktop version)", interactive=True)
    
    # 升级提示
    gr.Markdown("""
    <div class="upgrade-notice">
    <h3>🔝 Unlock Full Features</h3>
    <p>Get access to all dimensions and unlimited evaluations.</p>
    <a href="https://www.echoscore.dev" target="_blank">Learn more about ECHOscore</a>
    </div>
    """)
    
    s1.change(toggle_explain, s1, e1)
    s2.change(toggle_explain, s2, e2)
    s3.change(toggle_explain, s3, e3)
    s4.change(toggle_explain, s4, e4)
    s5.change(toggle_explain, s5, e5)
    
    # 结果区域（初始隐藏）
    with gr.Row(visible=False) as results_area:
        subj_tbl = gr.Dataframe(label="Prompt Subjective Scores")
        oval_tbl = gr.Dataframe(label="OVAL Automated Scores")
        deep_tbl = gr.Dataframe(label="DeepEval Automated Scores")
    
    radar          = gr.Plot(label="Final Radar Chart")
    csv_out        = gr.File(label="Export CSV")
    notes_out      = gr.Textbox(label="Notes (Slang/Tech Terms)")
    exp1_out       = gr.Textbox(label="Clarity Explanation")
    exp2_out       = gr.Textbox(label="Scope Definition Explanation")
    exp3_out       = gr.Textbox(label="Intent Alignment Explanation")
    exp4_out       = gr.Textbox(label="Bias/Induction Explanation")
    exp5_out       = gr.Textbox(label="Efficiency Explanation")
    auto_out       = gr.Textbox(label="Automatic Explanation")
    judge_llm_out  = gr.Textbox(label="LLM-as-a-Judge")
    ja1_out        = gr.Number(label="Sensory Accuracy",visible=False)
    ja2_out        = gr.Number(label="Emotional Engagement",visible=False)
    ja3_out        = gr.Number(label="Flow & Naturalness",visible=False)
    ja4_out        = gr.Number(label="Imagery Completeness",visible=False)
    ja5_out        = gr.Number(label="Simplicity & Accessibility",visible=False)
    judge_remarks_out = gr.Textbox(label="Judge Remarks")
    
    submit = gr.Button("Submit", elem_id="submit-btn")
    
    # 新增：创建一个用于显示底部计数器的组件
    footer_count = gr.Textbox(label="Today's Usage", value="Today Counts: 0/150", interactive=False, visible=True)
    
    gr.Markdown("""
    <div>
    ⚠️ This is a **demo version** of ECHOscore.  
       Data contribution, uploads, and edits are **not supported**.  
       To try the full version, please download the desktop release.
    </div>
    """)

    # 初始化检查
    iface.load(
        check_daily_limit_state,
        None,
        [limit_notice, submit, results_area, daily_count, footer_count]  # 添加footer_count
    )
    
    iface.load(
        lambda: generate_captcha(),
        None,
        [captcha_text, correct_answer]
    )
    
    submit.click(
        evaluate,
        [
            prompt_in, output_in,
            s1, s2, s3, s4, s5,
            e1, e2, e3, e4, e5,
            judge_llm, ja1, ja2, ja3, ja4, ja5,
            judge_remark, remark,
            captcha_answer, correct_answer,
            session_state
        ],
        [
            limit_notice, results_area,
            subj_tbl, oval_tbl, deep_tbl,
            radar, csv_out, notes_out,
            exp1_out, exp2_out, exp3_out, exp4_out, exp5_out,
            auto_out,
            judge_llm_out, ja1_out, ja2_out, ja3_out, ja4_out, ja5_out,
            judge_remarks_out,
            captcha_text, correct_answer,
            daily_count,
            judge_section_state,  # 更新Judge区块状态
            footer_count  # 更新底部计数器
        ]
    )
    
    # 点击CSV下载按钮时显示提示
    csv_out.download(show_personal_version_notice)
    
    # 切换Judge部分的显示状态
    toggle_judge_btn.click(
        lambda x: ("expanded" if x == "collapsed" else "collapsed"),
        judge_section_state,
        judge_section_state
    ).then(
        toggle_judge_section,
        judge_section_state,
        [judge_section, toggle_judge_btn]
    )

if __name__ == "__main__":
    iface.launch()