|
|
|
|
|
from flask import Flask, render_template, request, session, redirect, url_for |
|
from flask_session import Session |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForCausalLM |
|
import nltk |
|
from rouge_score import rouge_scorer |
|
from sacrebleu.metrics import BLEU |
|
from datetime import datetime |
|
import os |
|
import math |
|
import logging |
|
import gc |
|
import time |
|
import re |
|
|
|
print("AI ๋ชจ๋ธ๊ณผ ํ๊ฐ ์งํ๋ฅผ ๋ก๋ฉํฉ๋๋ค...") |
|
try: |
|
nltk_data_path = '/tmp/nltk_data' |
|
nltk.download('punkt', download_dir=nltk_data_path, quiet=True) |
|
nltk.data.path.append(nltk_data_path) |
|
|
|
model_name = "EleutherAI/polyglot-ko-1.3b" |
|
|
|
print(f"๋ชจ๋ธ ๋ก๋ฉ ์ค: {model_name}") |
|
tokenizer = AutoTokenizer.from_pretrained( |
|
model_name, |
|
trust_remote_code=True |
|
) |
|
if tokenizer.pad_token is None: |
|
tokenizer.pad_token = tokenizer.eos_token |
|
|
|
device = torch.device("cuda" if torch.cuda.is_available() else "cpu") |
|
model = AutoModelForCausalLM.from_pretrained( |
|
model_name, |
|
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, |
|
low_cpu_mem_usage=True, |
|
trust_remote_code=True |
|
) |
|
model.to(device) |
|
|
|
|
|
model.eval() |
|
if torch.cuda.is_available(): |
|
model.half() |
|
|
|
scorer = rouge_scorer.RougeScorer(['rouge1', 'rouge2', 'rougeL'], use_stemmer=True) |
|
bleu = BLEU() |
|
|
|
print("AI ๋ชจ๋ธ ๋ก๋ฉ ๋ฐ ์ต์ ํ ์๋ฃ.") |
|
model_loaded = True |
|
|
|
if torch.cuda.is_available(): |
|
print(f"GPU ๋ฉ๋ชจ๋ฆฌ ์ฌ์ฉ๋: {torch.cuda.memory_allocated() / 1024**3:.2f} GB") |
|
|
|
except Exception as e: |
|
print(f"๋ชจ๋ธ ๋ก๋ฉ ์ค ์ฌ๊ฐํ ์ค๋ฅ ๋ฐ์: {e}") |
|
model_loaded = False |
|
|
|
app = Flask(__name__) |
|
|
|
app.config["SESSION_PERMANENT"] = False |
|
app.config["SESSION_TYPE"] = "filesystem" |
|
app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', os.urandom(24)) |
|
Session(app) |
|
|
|
log_handler = logging.FileHandler('report_log.txt', encoding='utf-8') |
|
log_handler.setLevel(logging.INFO) |
|
log_formatter = logging.Formatter('%(asctime)s - %(message)s', '%Y-%m-%d %H:%M:%S') |
|
log_handler.setFormatter(log_formatter) |
|
app.logger.addHandler(log_handler) |
|
app.logger.setLevel(logging.INFO) |
|
|
|
|
|
def is_structured_text(text): |
|
"""๋ฌธ์๊ฐ ๊ตฌ์กฐํ๋ ํ์์ธ์ง ํ๋จ""" |
|
lines = text.split('\n') |
|
|
|
|
|
structure_indicators = 0 |
|
|
|
|
|
numbering_patterns = [ |
|
r'^\s*\d+[\.\)]\s+', |
|
r'^\s*\(\d+\)\s+', |
|
r'^\s*[๊ฐ-ํฃ][\.\)]\s+', |
|
r'^\s*[โ -โณ]\s+', |
|
r'^\s*[โ
ฐ-โ
น][\.\)]\s+', |
|
r'^\s*[a-zA-Z][\.\)]\s+', |
|
r'^\s*[-โขโฃโ]\s+', |
|
] |
|
|
|
|
|
header_patterns = [ |
|
r'^#{1,6}\s+', |
|
r'^[์ ็ฌฌ]\s*\d+\s*[์ฅ์ ๊ด์กฐํญ]', |
|
r'^\d+\.\s+\w+', |
|
r'^[โ
โ
กโ
ขโ
ฃโ
คโ
ฅโ
ฆโ
งโ
จโ
ฉโ
ชโ
ซโ
ฉโ
ขโ
ฉโ
ฃโ
ฉโ
คโ
ฉโ
ฅโ
ฉโ
ฆโ
ฉโ
งโ
ฉโ
จโ
ฉโ
ฉ][\.\s]', |
|
] |
|
|
|
total_lines = len([l for l in lines if l.strip()]) |
|
if total_lines == 0: |
|
return False |
|
|
|
numbered_lines = 0 |
|
header_lines = 0 |
|
|
|
for line in lines: |
|
if not line.strip(): |
|
continue |
|
|
|
|
|
for pattern in numbering_patterns: |
|
if re.match(pattern, line): |
|
numbered_lines += 1 |
|
break |
|
|
|
|
|
for pattern in header_patterns: |
|
if re.match(pattern, line): |
|
header_lines += 1 |
|
break |
|
|
|
|
|
structure_ratio = (numbered_lines + header_lines) / total_lines |
|
|
|
|
|
return structure_ratio > 0.2 |
|
|
|
|
|
def validate_ppl_text(text): |
|
text_len = len(text) |
|
if text_len < 2000: |
|
return {"valid": False, "message": f"ํ
์คํธ๊ฐ ๋๋ฌด ์งง์ต๋๋ค. ํ์ฌ {text_len}์, ์ต์ 2000์ ์ด์ ์
๋ ฅํด์ฃผ์ธ์."} |
|
|
|
|
|
is_structured = is_structured_text(text) |
|
|
|
|
|
char_repetitions = 0 |
|
max_consecutive = 0 |
|
current_consecutive = 1 |
|
|
|
for i in range(1, len(text)): |
|
if text[i] == text[i-1] and text[i] not in ' \n\t': |
|
current_consecutive += 1 |
|
max_consecutive = max(max_consecutive, current_consecutive) |
|
else: |
|
if current_consecutive > 10: |
|
char_repetitions += current_consecutive |
|
current_consecutive = 1 |
|
|
|
char_repetition_ratio = char_repetitions / text_len |
|
|
|
|
|
repetition_threshold = 0.5 if is_structured else 0.3 |
|
if char_repetition_ratio > repetition_threshold: |
|
return {"valid": False, "message": f"๋ฐ๋ณต ๋ฌธ์๊ฐ ๋๋ฌด ๋ง์ต๋๋ค ({char_repetition_ratio*100:.1f}%). ์ ์์ ์ธ ํ
์คํธ๋ฅผ ์
๋ ฅํด์ฃผ์ธ์."} |
|
|
|
|
|
words = text.split() |
|
if len(words) > 0: |
|
|
|
if not is_structured: |
|
bigrams = [' '.join(words[i:i+2]) for i in range(len(words) - 1)] |
|
trigrams = [' '.join(words[i:i+3]) for i in range(len(words) - 2)] |
|
|
|
bigram_unique_ratio = len(set(bigrams)) / len(bigrams) if bigrams else 1 |
|
trigram_unique_ratio = len(set(trigrams)) / len(trigrams) if trigrams else 1 |
|
|
|
if bigram_unique_ratio < 0.5 or trigram_unique_ratio < 0.6: |
|
return {"valid": False, "message": "๋ฐ๋ณต๋๋ ๋จ์ด ํจํด์ด ๋๋ฌด ๋ง์ต๋๋ค. ๋ค์ํ ๋ด์ฉ์ ํ
์คํธ๋ฅผ ์
๋ ฅํด์ฃผ์ธ์."} |
|
|
|
|
|
tokens = tokenizer.convert_ids_to_tokens(tokenizer(text, max_length=1024, truncation=True).input_ids) |
|
|
|
|
|
for n in range(2, 7): |
|
if len(tokens) >= n: |
|
ngrams = [tuple(tokens[i:i+n]) for i in range(len(tokens) - n + 1)] |
|
if ngrams: |
|
unique_ratio = len(set(ngrams)) / len(ngrams) |
|
|
|
if is_structured: |
|
threshold = 0.2 + (n - 2) * 0.1 |
|
else: |
|
threshold = 0.3 + (n - 2) * 0.1 |
|
|
|
if unique_ratio < threshold: |
|
return {"valid": False, "message": f"๋ฐ๋ณต๋๋ {n}-gram ํจํด์ด ๋๋ฌด ๋ง์ต๋๋ค. ๋ค์ํ ๋ด์ฉ์ ํ
์คํธ๋ฅผ ์
๋ ฅํด์ฃผ์ธ์."} |
|
|
|
word_count = len(words) |
|
structure_msg = " (๊ตฌ์กฐํ๋ ๋ฌธ์๋ก ๊ฐ์ง๋จ)" if is_structured else "" |
|
return {"valid": True, "message": f"โ
๊ฒ์ฆ ์๋ฃ: {text_len}์, {word_count}๋จ์ด{structure_msg}"} |
|
|
|
|
|
def calculate_perplexity_logic(text, max_tokens=512, use_sliding_window=False): |
|
encodings = tokenizer(text, return_tensors="pt", max_length=max_tokens, truncation=True) |
|
input_ids = encodings.input_ids[0].to(device) |
|
|
|
if len(input_ids) < 10: |
|
raise ValueError("ํ ํฐ ์๊ฐ ๋๋ฌด ์ ์ต๋๋ค (์ต์ 10๊ฐ)") |
|
|
|
tokens = tokenizer.convert_ids_to_tokens(input_ids) |
|
|
|
|
|
is_structured = is_structured_text(text) |
|
|
|
|
|
is_high_quality = detect_high_quality_text(text) |
|
|
|
|
|
repetition_penalties = {} |
|
|
|
|
|
char_repetitions = 0 |
|
|
|
for i in range(1, min(len(text), 1000)): |
|
if i < len(text) and text[i] == text[i-1] and text[i] not in ' \n\t.,!?;:': |
|
char_repetitions += 1 |
|
char_penalty = min(char_repetitions / 1000, 0.5) |
|
|
|
|
|
for n in range(3, 6): |
|
if len(tokens) >= n: |
|
ngrams = [tuple(tokens[i:i+n]) for i in range(len(tokens) - n + 1)] |
|
if ngrams: |
|
unique_ratio = len(set(ngrams)) / len(ngrams) |
|
|
|
repetition_penalties[f'{n}gram'] = max(0, (1 - unique_ratio) * 0.5) |
|
|
|
|
|
unique_tokens = len(set(tokens)) |
|
total_tokens = len(tokens) |
|
vocabulary_diversity = unique_tokens / total_tokens |
|
|
|
|
|
expected_diversity = 0.3 if any(ord(c) > 0x3130 for c in text[:100]) else 0.5 |
|
diversity_penalty = max(0, expected_diversity - vocabulary_diversity) |
|
|
|
|
|
quality_bonus = 1.0 |
|
if is_high_quality: |
|
quality_bonus = 0.5 |
|
elif is_structured: |
|
quality_bonus = 0.7 |
|
|
|
|
|
avg_repetition = sum(repetition_penalties.values()) / max(len(repetition_penalties), 1) |
|
|
|
|
|
if char_penalty > 0.3: |
|
total_penalty = (avg_repetition * 2 + char_penalty * 3 + diversity_penalty) |
|
penalty_factor = math.exp(total_penalty * 3.0) |
|
else: |
|
total_penalty = (avg_repetition * 0.3 + char_penalty * 0.5 + diversity_penalty * 0.5) * quality_bonus |
|
penalty_factor = math.exp(total_penalty * 1.5) |
|
|
|
seq_len = input_ids.size(0) |
|
|
|
with torch.no_grad(): |
|
if not use_sliding_window or seq_len <= 256: |
|
outputs = model(input_ids.unsqueeze(0), labels=input_ids.unsqueeze(0)) |
|
ppl = torch.exp(outputs.loss).item() |
|
else: |
|
max_length = 256 |
|
stride = 128 |
|
nlls = [] |
|
for begin_loc in range(0, seq_len, stride): |
|
end_loc = min(begin_loc + max_length, seq_len) |
|
input_chunk = input_ids[begin_loc:end_loc].unsqueeze(0) |
|
try: |
|
outputs = model(input_chunk, labels=input_chunk) |
|
if outputs.loss is not None and torch.isfinite(outputs.loss): |
|
nlls.append(outputs.loss) |
|
except Exception as chunk_error: |
|
print(f"์ฒญํฌ ์ฒ๋ฆฌ ์ค๋ฅ: {chunk_error}") |
|
continue |
|
if not nlls: |
|
raise RuntimeError("์ ํจํ loss ๊ฐ์ ๊ณ์ฐํ ์ ์์ต๋๋ค") |
|
ppl = torch.exp(torch.mean(torch.stack(nlls))).item() |
|
|
|
|
|
if is_high_quality and ppl > 20: |
|
ppl = ppl * 0.6 |
|
elif is_structured and ppl > 30: |
|
ppl = ppl * 0.8 |
|
|
|
adjusted_ppl = ppl * penalty_factor |
|
|
|
|
|
if adjusted_ppl > 200 and char_penalty < 0.2: |
|
|
|
adjusted_ppl = min(adjusted_ppl, 150) |
|
|
|
return { |
|
'base_ppl': ppl, |
|
'adjusted_ppl': adjusted_ppl, |
|
'penalty_factor': penalty_factor, |
|
'token_count': len(input_ids), |
|
'vocabulary_diversity': vocabulary_diversity, |
|
'char_repetition': char_penalty, |
|
'is_structured': is_structured, |
|
'is_high_quality': is_high_quality |
|
} |
|
|
|
def detect_high_quality_text(text): |
|
"""๊ณ ํ์ง ํ
์คํธ(GPT ์์ฑ ๋ฑ) ๊ฐ์ง""" |
|
indicators = 0 |
|
|
|
|
|
paragraphs = text.split('\n\n') |
|
if len(paragraphs) >= 3: |
|
indicators += 1 |
|
|
|
|
|
sentences = re.split(r'[.!?]\s', text) |
|
if len(sentences) > 5: |
|
|
|
complete_sentences = sum(1 for s in sentences if len(s.strip()) > 10) |
|
if complete_sentences / len(sentences) > 0.8: |
|
indicators += 1 |
|
|
|
|
|
transition_words = ['์ฒซ์งธ', '๋์งธ', '์
์งธ', '๋ฐ๋ผ์', '๊ทธ๋ฌ๋', '๋ํ', '์๋ฅผ ๋ค์ด', |
|
'๊ฒฐ๋ก ์ ์ผ๋ก', '๋ฌด์๋ณด๋ค', '๋์๊ฐ', '๋๋ถ์ด', 'ํนํ', '์ด์ ๋ฐ๋ผ'] |
|
transition_count = sum(1 for word in transition_words if word in text) |
|
if transition_count >= 3: |
|
indicators += 1 |
|
|
|
|
|
professional_terms = ['์ฒด๊ณ', '๊ตฌ์ถ', '๊ธฐ๋ฐ', 'ํ์ฉ', '๋ถ์', '๋ชจ๋ธ', '์์คํ
', |
|
'ํ๋ก์ธ์ค', '์ ๋ต', '๋ฐฉ์', 'ํจ๊ณผ', '๊ฐ์ ', '๊ณ ๋ํ'] |
|
prof_count = sum(text.count(term) for term in professional_terms) |
|
if prof_count > len(text.split()) * 0.02: |
|
indicators += 1 |
|
|
|
|
|
if len(paragraphs) > 2: |
|
lengths = [len(p) for p in paragraphs if p.strip()] |
|
if lengths: |
|
avg_length = sum(lengths) / len(lengths) |
|
variance = sum((l - avg_length) ** 2 for l in lengths) / len(lengths) |
|
if variance < (avg_length * 0.5) ** 2: |
|
indicators += 1 |
|
|
|
|
|
return indicators >= 3 |
|
|
|
def get_ppl_calculation_mode(text_length): |
|
if text_length > 2000: |
|
return "ultra_fast" |
|
elif text_length > 1000: |
|
return "fast" |
|
else: |
|
return "accurate" |
|
|
|
def get_ppl_score(adjusted_ppl): |
|
|
|
if adjusted_ppl < 15: return 3.0 |
|
elif adjusted_ppl < 30: return 2.4 |
|
elif adjusted_ppl < 50: return 1.8 |
|
elif adjusted_ppl < 100: return 1.2 |
|
else: return 0.6 |
|
|
|
def get_rouge_score(final_rouge_score): |
|
|
|
if final_rouge_score >= 0.60: return 3.0 |
|
elif final_rouge_score >= 0.50: return 2.4 |
|
elif final_rouge_score >= 0.40: return 1.8 |
|
elif final_rouge_score >= 0.30: return 1.2 |
|
else: return 0.6 |
|
|
|
def get_bleu_score(bleu_score): |
|
|
|
if bleu_score >= 0.50: return 2.0 |
|
elif bleu_score >= 0.40: return 1.6 |
|
elif bleu_score >= 0.30: return 1.2 |
|
elif bleu_score >= 0.20: return 0.8 |
|
else: return 0.4 |
|
|
|
def cleanup_memory(): |
|
if torch.cuda.is_available(): |
|
torch.cuda.empty_cache() |
|
gc.collect() |
|
|
|
@app.route('/', methods=['GET']) |
|
def index(): |
|
all_results = session.get('all_results', {}) |
|
input_texts = session.get('input_texts', {}) |
|
return render_template('index.html', model_loaded=model_loaded, all_results=all_results, input_texts=input_texts) |
|
|
|
|
|
@app.route('/evaluate', methods=['POST']) |
|
def evaluate_text(): |
|
if 'all_results' not in session: session['all_results'] = {} |
|
if 'input_texts' not in session: session['input_texts'] = {} |
|
|
|
target_url = request.form.get('target_url') |
|
if target_url: session['all_results']['target_url'] = target_url |
|
|
|
metric = request.form.get('metric') |
|
results_to_store = {'metric': metric} |
|
|
|
try: |
|
if metric == 'perplexity': |
|
text = request.form.get('ppl_text', '').strip() |
|
session['input_texts']['ppl_text'] = text |
|
|
|
validation_result = validate_ppl_text(text) |
|
if not validation_result["valid"]: |
|
results_to_store['error'] = validation_result["message"] |
|
elif not model_loaded: |
|
results_to_store['error'] = "๋ชจ๋ธ์ด ๋ก๋ฉ๋์ง ์์์ต๋๋ค." |
|
else: |
|
try: |
|
cleanup_memory() |
|
|
|
calc_mode = get_ppl_calculation_mode(len(text)) |
|
start_time = time.time() |
|
|
|
if calc_mode == "ultra_fast": |
|
ppl_result = calculate_perplexity_logic(text, max_tokens=256, use_sliding_window=False) |
|
elif calc_mode == "fast": |
|
ppl_result = calculate_perplexity_logic(text, max_tokens=384, use_sliding_window=False) |
|
else: |
|
ppl_result = calculate_perplexity_logic(text, max_tokens=512, use_sliding_window=True) |
|
|
|
calc_time = time.time() - start_time |
|
adjusted_ppl = ppl_result['adjusted_ppl'] |
|
|
|
results_to_store['score_value'] = adjusted_ppl |
|
results_to_store['score_display'] = f"{adjusted_ppl:.4f}" |
|
results_to_store['details'] = { |
|
'base_ppl': f"{ppl_result['base_ppl']:.4f}", |
|
'penalty_factor': f"{ppl_result['penalty_factor']:.4f}", |
|
'token_count': ppl_result['token_count'], |
|
'calc_time': f"{calc_time:.2f}s", |
|
'calc_mode': calc_mode, |
|
'is_structured': ppl_result.get('is_structured', False) |
|
} |
|
results_to_store['final_score'] = get_ppl_score(adjusted_ppl) |
|
|
|
cleanup_memory() |
|
|
|
except Exception as ppl_error: |
|
results_to_store['error'] = f"PPL ๊ณ์ฐ ์ค ์ค๋ฅ: {ppl_error}" |
|
|
|
session['all_results']['perplexity'] = results_to_store |
|
|
|
elif metric == 'rouge': |
|
gen_text = request.form.get('rouge_generated', '').strip() |
|
ref_text = request.form.get('rouge_reference', '').strip() |
|
session['input_texts']['rouge_generated'] = gen_text |
|
session['input_texts']['rouge_reference'] = ref_text |
|
|
|
if not gen_text or not ref_text: |
|
results_to_store['error'] = "์์ฑ๋ ์์ฝ๋ฌธ๊ณผ ์ฐธ์กฐ ์์ฝ๋ฌธ์ ๋ชจ๋ ์
๋ ฅํด์ฃผ์ธ์." |
|
else: |
|
scores = scorer.score(ref_text, gen_text) |
|
r1, r2, rL = scores['rouge1'].fmeasure, scores['rouge2'].fmeasure, scores['rougeL'].fmeasure |
|
|
|
weighted_avg = (r1 * 0.3 + r2 * 0.3 + rL * 0.4) |
|
|
|
len_gen = len(gen_text.split()); len_ref = len(ref_text.split()) |
|
length_ratio = len_gen / len_ref if len_ref > 0 else 0 |
|
if 0.8 <= length_ratio <= 1.2: length_penalty = 1.0 |
|
elif length_ratio < 0.5 or length_ratio > 2.0: length_penalty = 0.8 |
|
else: length_penalty = 0.9 |
|
final_rouge_score = weighted_avg * length_penalty |
|
|
|
results_to_store['score_value'] = final_rouge_score |
|
results_to_store['score_display'] = f"{final_rouge_score:.4f}" |
|
results_to_store['details'] = { |
|
'rouge1': f"{r1:.4f}", |
|
'rouge2': f"{r2:.4f}", |
|
'rougeL': f"{rL:.4f}", |
|
'weighted_avg': f"{weighted_avg:.4f}", |
|
'length_penalty': f"{length_penalty:.2f}" |
|
} |
|
results_to_store['final_score'] = get_rouge_score(final_rouge_score) |
|
|
|
session['all_results']['rouge'] = results_to_store |
|
|
|
elif metric == 'bleu': |
|
gen_text = request.form.get('bleu_generated', '').strip() |
|
ref_text1 = request.form.get('bleu_reference1', '').strip() |
|
ref_text2 = request.form.get('bleu_reference2', '').strip() |
|
session['input_texts']['bleu_generated'] = gen_text |
|
session['input_texts']['bleu_reference1'] = ref_text1 |
|
session['input_texts']['bleu_reference2'] = ref_text2 |
|
|
|
if not gen_text or not ref_text1 or not ref_text2: |
|
results_to_store['error'] = "์์ฑ๋ ๋ฒ์ญ๋ฌธ๊ณผ 2๊ฐ์ ์ฐธ์กฐ ๋ฒ์ญ๋ฌธ์ ๋ชจ๋ ์
๋ ฅํด์ฃผ์ธ์." |
|
else: |
|
try: |
|
|
|
references = [[ref_text1, ref_text2]] |
|
|
|
|
|
try: |
|
|
|
bleu_score = bleu.sentence_score(gen_text, references).score / 100 |
|
except Exception: |
|
|
|
try: |
|
|
|
bleu_score = bleu.corpus_score([gen_text], [references]).score / 100 |
|
except Exception: |
|
|
|
from sacrebleu import sentence_bleu |
|
bleu_score = sentence_bleu(gen_text, references).score / 100 |
|
|
|
results_to_store['score_value'] = bleu_score |
|
results_to_store['score_display'] = f"{bleu_score:.4f}" |
|
results_to_store['final_score'] = get_bleu_score(bleu_score) |
|
|
|
except Exception as bleu_error: |
|
|
|
try: |
|
|
|
from nltk.translate.bleu_score import sentence_bleu as nltk_bleu |
|
from nltk.translate.bleu_score import SmoothingFunction |
|
|
|
gen_tokens = gen_text.split() |
|
ref_tokens1 = ref_text1.split() |
|
ref_tokens2 = ref_text2.split() |
|
|
|
smoothing = SmoothingFunction().method1 |
|
bleu_score = nltk_bleu([ref_tokens1, ref_tokens2], gen_tokens, smoothing_function=smoothing) |
|
|
|
results_to_store['score_value'] = bleu_score |
|
results_to_store['score_display'] = f"{bleu_score:.4f}" |
|
results_to_store['final_score'] = get_bleu_score(bleu_score) |
|
|
|
except Exception as nltk_error: |
|
results_to_store['error'] = f"BLEU ๊ณ์ฐ ์ค ์ค๋ฅ: {str(bleu_error)[:100]}" |
|
|
|
session['all_results']['bleu'] = results_to_store |
|
|
|
elif metric in ['mmlu', 'truthfulqa', 'drop', 'mbpp_humaneval']: |
|
generated_text = request.form.get(f'{metric}_generated', '') |
|
reference_text = request.form.get(f'{metric}_reference', '') |
|
grade = request.form.get(f'{metric}_grade', '') |
|
|
|
session['input_texts'][f'{metric}_generated'] = generated_text |
|
session['input_texts'][f'{metric}_reference'] = reference_text |
|
|
|
|
|
max_score = 3 |
|
|
|
score_map = { |
|
'์': 1.0, |
|
'์ฐ': 0.8, |
|
'๋ฏธ': 0.6, |
|
'์': 0.4, |
|
'๊ฐ': 0.2 |
|
} |
|
|
|
if grade and grade in score_map: |
|
final_score = max_score * score_map[grade] |
|
results_to_store['grade'] = grade |
|
results_to_store['final_score'] = final_score |
|
else: |
|
results_to_store['grade'] = None |
|
results_to_store['final_score'] = 0 |
|
if not grade: |
|
results_to_store['error'] = "ํ๊ฐ ๋ฑ๊ธ์ ์ ํํด์ฃผ์ธ์." |
|
|
|
session['all_results'][metric] = results_to_store |
|
|
|
except Exception as e: |
|
results_to_store['error'] = f"๊ณ์ฐ ์ค ์ค๋ฅ ๋ฐ์: {e}" |
|
session['all_results'][metric] = results_to_store |
|
app.logger.error(f"ํ๊ฐ ์ค ์ค๋ฅ - ๋ฉํธ๋ฆญ: {metric}, ์ค๋ฅ: {e}") |
|
|
|
session.modified = True |
|
return redirect(url_for('index', _anchor=metric)) |
|
|
|
|
|
@app.route('/report') |
|
def report(): |
|
all_results = session.get('all_results', {}) |
|
input_texts = session.get('input_texts', {}) |
|
try: |
|
target_url = all_results.get('target_url', 'N/A') |
|
total_score = sum(res.get('final_score', 0) for res in all_results.values() if isinstance(res, dict)) |
|
log_message = f"๋ณด๊ณ ์ ์์ฑ - ๋์: {target_url}, ์ด์ : {total_score:.2f}/20" |
|
app.logger.info(log_message) |
|
except Exception as e: |
|
app.logger.error(f"๋ก๊ทธ ๊ธฐ๋ก ์ค ์ค๋ฅ ๋ฐ์: {e}") |
|
return render_template('report.html', all_results=all_results, input_texts=input_texts) |
|
|
|
|
|
@app.route('/reset') |
|
def reset(): |
|
session.pop('all_results', None) |
|
session.pop('input_texts', None) |
|
cleanup_memory() |
|
return redirect(url_for('index')) |
|
|
|
|
|
@app.route('/memory_status') |
|
def memory_status(): |
|
status = {} |
|
if torch.cuda.is_available(): |
|
status['gpu_allocated'] = f"{torch.cuda.memory_allocated() / 1024**3:.2f} GB" |
|
status['gpu_reserved'] = f"{torch.cuda.memory_reserved() / 1024**3:.2f} GB" |
|
import psutil |
|
process = psutil.Process() |
|
status['ram_usage'] = f"{process.memory_info().rss / 1024**3:.2f} GB" |
|
return status |
|
|
|
|
|
if __name__ == '__main__': |
|
app.run(host='0.0.0.0', port=7860) |