Spaces:

kland
/

llm_eval3

Sleeping

App Files Files Community

llm_eval3 / templates /report.html

kland

Upload 4 files

6c886f2 verified 14 days ago

raw

history blame contribute delete

12 kB

	<!DOCTYPE html>
	<html lang="ko">
	<head>
	<meta charset="UTF-8">
	<title>㈜강원랜드 생성형 AI 모델 실증 평가 보고서</title>
	<style>
	@media print {
	body { -webkit-print-color-adjust: exact; }
	.no-print { display: none; }
	.page-break { page-break-before: always; }
	}
	body { font-family: 'Malgun Gothic', sans-serif; margin: 0; padding: 0; background-color: #fff; }
	.container { max-width: 800px; margin: 40px auto; padding: 40px; border: 1px solid #ccc; box-shadow: 0 0 10px rgba(0,0,0,0.1); }
	h1, h2 { text-align: center; border-bottom: 2px solid #000; padding-bottom: 10px; margin-bottom: 40px; }
	h1 { background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); color: white; padding: 20px; border: none; }
	h2 { border-top: 2px solid #ccc; margin-top: 40px; }
	table { width: 100%; border-collapse: collapse; margin-bottom: 30px; }
	th, td { border: 1px solid #999; padding: 10px; text-align: left; word-break: break-all; }
	th { background-color: #f2f2f2; font-weight: bold; text-align: center; }
	.label { font-weight: bold; color: #333; }
	.score { font-size: 1.1em; color: #0056b3; font-weight: bold; }
	.final-score { font-size: 1.2em; color: #d9534f; font-weight: bold; }
	.footer { margin-top: 80px; text-align: center; }
	.signature-box { margin-top: 60px; float: right; }
	.button-container { text-align: center; margin-top: 40px; }
	button { background-color: #007bff; color: white; border: none; padding: 10px 20px; font-size: 1em; cursor: pointer; border-radius: 5px; }
	.total-score-row th, .total-score-row td { background-color: #e9ecef; font-weight: bold; font-size: 1.1em; }
	.details-section h3 { color: #34495e; border-bottom: 1px solid #ccc; padding-bottom: 5px; }
	.details-section pre { background-color: #f8f9fa; padding: 15px; border-radius: 5px; white-space: pre-wrap; word-wrap: break-word; font-size: 0.9em; }
	</style>
	</head>
	<body>

	<div class="container">
	<h1>㈜강원랜드 생성형 AI 모델 실증 평가 보고서</h1>

	<table>
	<tr>
	<th style="width: 30%;">평가 대상 LLM 모델 주소</th>
	<td colspan="3">{{ all_results.target_url if all_results.target_url else '입력되지 않음' }}</td>
	</tr>
	<tr>
	<th>평가 영역</th>
	<th>평가 항목</th>
	<th>결과값</th>
	<th>최종 점수</th>
	</tr>

	<tr>
	<th rowspan="3" style="vertical-align: middle;">정량 평가 (8점)</th>
	<td>1. Perplexity</td>
	<td style="text-align: center;"><span class="score">{{ all_results.perplexity.score_display if all_results.perplexity and not all_results.perplexity.error else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.perplexity.final_score if all_results.perplexity and not all_results.perplexity.error else 0) }} / 3</span></td>
	</tr>
	<tr>
	<td>2. ROUGE</td>
	<td style="text-align: center;"><span class="score">{{ all_results.rouge.score_display if all_results.rouge and not all_results.rouge.error else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.rouge.final_score if all_results.rouge and not all_results.rouge.error else 0) }} / 3</span></td>
	</tr>
	<tr>
	<td>3. BLEU</td>
	<td style="text-align: center;"><span class="score">{{ all_results.bleu.score_display if all_results.bleu and not all_results.bleu.error else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.bleu.final_score if all_results.bleu and not all_results.bleu.error else 0) }} / 2</span></td>
	</tr>

	<tr>
	<th rowspan="4" style="vertical-align: middle;">정성 평가 (12점)</th>
	<td>4. MMLU</td>
	<td style="text-align: center;"><span class="score">{{ all_results.mmlu.grade if all_results.mmlu and all_results.mmlu.grade else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.mmlu.final_score if all_results.mmlu and all_results.mmlu.final_score else 0) }} / 3</span></td>
	</tr>
	<tr>
	<td>5. TruthfulQA</td>
	<td style="text-align: center;"><span class="score">{{ all_results.truthfulqa.grade if all_results.truthfulqa and all_results.truthfulqa.grade else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.truthfulqa.final_score if all_results.truthfulqa and all_results.truthfulqa.final_score else 0) }} / 3</span></td>
	</tr>
	<tr>
	<td>6. DROP</td>
	<td style="text-align: center;"><span class="score">{{ all_results.drop.grade if all_results.drop and all_results.drop.grade else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.drop.final_score if all_results.drop and all_results.drop.final_score else 0) }} / 3</span></td>
	</tr>
	<tr>
	<td>7. MBPP & HumanEval</td>
	<td style="text-align: center;"><span class="score">{{ all_results.mbpp_humaneval.grade if all_results.mbpp_humaneval and all_results.mbpp_humaneval.grade else '-' }}</span></td>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(all_results.mbpp_humaneval.final_score if all_results.mbpp_humaneval and all_results.mbpp_humaneval.final_score else 0) }} / 3</span></td>
	</tr>

	{% set total_score = (all_results.perplexity.final_score\|default(0, true) if not all_results.perplexity.error\|default(false, true) else 0) +
	(all_results.rouge.final_score\|default(0, true) if not all_results.rouge.error\|default(false, true) else 0) +
	(all_results.bleu.final_score\|default(0, true) if not all_results.bleu.error\|default(false, true) else 0) +
	(all_results.mmlu.final_score\|default(0, true)) +
	(all_results.truthfulqa.final_score\|default(0, true)) +
	(all_results.drop.final_score\|default(0, true)) +
	(all_results.mbpp_humaneval.final_score\|default(0, true)) %}
	<tr class="total-score-row">
	<th colspan="3">총점</th>
	<td style="text-align: center;"><span class="final-score">{{ "%.2f"\|format(total_score) }} / 20</span></td>
	</tr>

	</table>

	<div class="footer">
	<p>상기 내용과 같이 생성형 AI 모델 실증 평가를 완료하였음을 확인합니다.</p>
	<p class="eval-date"><b>평가일자:</b> <span id="report-date"></span></p>
	<div class="signature-box">
	<p><b>평가자:</b> ____________________ (서명)</p>
	</div>
	</div>
	</div>

	<div class="page-break"></div>

	<div class="container details-section">
	<h2>상세 평가 내용</h2>

	<div class="detail-item">
	<h3>1. Perplexity (언어 모델 품질)</h3>
	<h4>입력 텍스트:</h4>
	<pre>{{ input_texts.ppl_text if input_texts.ppl_text else '(평가되지 않음)' }}</pre>
	{% if all_results.perplexity and all_results.perplexity.details %}
	<h4>세부 측정값:</h4>
	<pre>기본 PPL: {{ all_results.perplexity.details.base_ppl }}
	페널티 계수: {{ all_results.perplexity.details.penalty_factor }}
	토큰 수: {{ all_results.perplexity.details.token_count }}
	계산 시간: {{ all_results.perplexity.details.calc_time }}</pre>
	{% endif %}
	</div>

	<div class="detail-item">
	<h3>2. ROUGE (요약 능력)</h3>
	<h4>모델 생성 요약문:</h4>
	<pre>{{ input_texts.rouge_generated if input_texts.rouge_generated else '(평가되지 않음)' }}</pre>
	<h4>참조 요약문:</h4>
	<pre>{{ input_texts.rouge_reference if input_texts.rouge_reference else '(평가되지 않음)' }}</pre>
	{% if all_results.rouge and all_results.rouge.details %}
	<h4>세부 측정값:</h4>
	<pre>ROUGE-1: {{ all_results.rouge.details.rouge1 if all_results.rouge.details.rouge1 else '-' }}
	ROUGE-2: {{ all_results.rouge.details.rouge2 if all_results.rouge.details.rouge2 else '-' }}
	ROUGE-L: {{ all_results.rouge.details.rougeL if all_results.rouge.details.rougeL else '-' }}
	가중 평균: {{ all_results.rouge.details.weighted_avg }}
	길이 페널티: {{ all_results.rouge.details.length_penalty }}</pre>
	{% endif %}
	</div>

	<div class="detail-item">
	<h3>3. BLEU (번역 정확성)</h3>
	<h4>모델 생성 번역문:</h4>
	<pre>{{ input_texts.bleu_generated if input_texts.bleu_generated else '(평가되지 않음)' }}</pre>
	<h4>참조 번역문 1:</h4>
	<pre>{{ input_texts.bleu_reference1 if input_texts.bleu_reference1 else '(평가되지 않음)' }}</pre>
	<h4>참조 번역문 2:</h4>
	<pre>{{ input_texts.bleu_reference2 if input_texts.bleu_reference2 else '(평가되지 않음)' }}</pre>
	</div>

	<div class="detail-item">
	<h3>4. MMLU (복합 문제 해결)</h3>
	<h4>모델 생성 결과:</h4>
	<pre>{{ input_texts.mmlu_generated if input_texts.mmlu_generated else '(평가되지 않음)' }}</pre>
	<h4>참조(정답):</h4>
	<pre>{{ input_texts.mmlu_reference if input_texts.mmlu_reference else '(평가되지 않음)' }}</pre>
	</div>

	<div class="detail-item">
	<h3>5. TruthfulQA (사실 기반 답변)</h3>
	<h4>모델 생성 결과:</h4>
	<pre>{{ input_texts.truthfulqa_generated if input_texts.truthfulqa_generated else '(평가되지 않음)' }}</pre>
	<h4>참조(정답):</h4>
	<pre>{{ input_texts.truthfulqa_reference if input_texts.truthfulqa_reference else '(평가되지 않음)' }}</pre>
	</div>

	<div class="detail-item">
	<h3>6. DROP (문서 독해/추론)</h3>
	<h4>모델 생성 결과:</h4>
	<pre>{{ input_texts.drop_generated if input_texts.drop_generated else '(평가되지 않음)' }}</pre>
	<h4>참조(정답):</h4>
	<pre>{{ input_texts.drop_reference if input_texts.drop_reference else '(평가되지 않음)' }}</pre>
	</div>

	<div class="detail-item">
	<h3>7. MBPP & HumanEval (코드 생성/업무 자동화)</h3>
	<h4>모델 생성 결과:</h4>
	<pre>{{ input_texts.mbpp_humaneval_generated if input_texts.mbpp_humaneval_generated else '(평가되지 않음)' }}</pre>
	<h4>참조(정답):</h4>
	<pre>{{ input_texts.mbpp_humaneval_reference if input_texts.mbpp_humaneval_reference else '(평가되지 않음)' }}</pre>
	</div>
	</div>


	<div class="button-container no-print">
	<button onclick="window.print()">보고서 인쇄하기</button>
	</div>

	<script>
	document.addEventListener('DOMContentLoaded', function() {
	const now = new Date();
	const year = now.getFullYear();
	const month = String(now.getMonth() + 1).padStart(2, '0');
	const day = String(now.getDate()).padStart(2, '0');
	const hours = String(now.getHours()).padStart(2, '0');
	const minutes = String(now.getMinutes()).padStart(2, '0');
	const seconds = String(now.getSeconds()).padStart(2, '0');
	const formattedDate = `${year}년 ${month}월 ${day}일 ${hours}:${minutes}:${seconds}`;
	document.getElementById('report-date').textContent = formattedDate;
	});
	</script>

	</body>
	</html>