Spaces:
Runtime error
Runtime error
| import json | |
| import numpy as np | |
| from metrics.graph_matching import ( | |
| get_triple_match_f1, | |
| get_graph_match_accuracy, | |
| get_bert_score, | |
| get_bleu_rouge, | |
| split_to_edges, | |
| get_tokens, | |
| get_ged | |
| ) | |
| def load_data(gold_path, pred_path): | |
| ''' | |
| 数据加载处理: | |
| 只评估在预测数据中出现的文本对应的三元组 | |
| 自动匹配真实数据和预测数据中的对应项 | |
| 多维度评估: | |
| Triple Match F1:评估三元组的精确匹配程度 | |
| Graph Match Accuracy:评估图结构的匹配程度 | |
| BERT Score:评估语义相似度 | |
| BLEU & ROUGE:评估文本生成质量 | |
| 图编辑距离(GED):评估图结构差异 | |
| ''' | |
| # 加载真实数据 | |
| with open(gold_path, 'r', encoding='utf-8') as f: | |
| gold_data = json.load(f) | |
| # 加载预测数据 | |
| with open(pred_path, 'r', encoding='utf-8') as f: | |
| pred_data = json.load(f) | |
| # 提取三元组列表 | |
| gold_graphs = [] | |
| pred_graphs = [] | |
| # 确保只评估在预测数据中出现的文本对应的三元组 | |
| for pred_item in pred_data: | |
| pred_text = pred_item['text'] | |
| # 在gold_data中找到对应的文本 | |
| for gold_item in gold_data: | |
| if gold_item['text'] == pred_text: | |
| gold_graphs.append(gold_item['triple_list']) | |
| pred_graphs.append(pred_item['triple_list']) | |
| break | |
| return gold_graphs, pred_graphs | |
| def evaluate_triples(gold_graphs, pred_graphs): | |
| print("开始评估...") | |
| print("="*50) | |
| # 1. Triple Match F1 | |
| precision, recall, f1 = get_triple_match_f1(gold_graphs, pred_graphs) | |
| print("Triple Match") | |
| print(f"精确率: {precision:.4f}, 召回率: {recall:.4f}, F1: {f1:.4f}") | |
| # # 2. Graph Match Accuracy | |
| # graph_acc = get_graph_match_accuracy(pred_graphs, gold_graphs) | |
| # print(f"图匹配准确率: {graph_acc:.10f}") | |
| # 3. BERT Score | |
| gold_edges = split_to_edges(gold_graphs) | |
| pred_edges = split_to_edges(pred_graphs) | |
| precisions_BS, recalls_BS, f1s_BS = get_bert_score(gold_edges, pred_edges) | |
| print(f"BERT Score:") | |
| print(f"- Precision: {precisions_BS.mean():.4f}") | |
| print(f"- Recall: {recalls_BS.mean():.4f}") | |
| print(f"- F1: {f1s_BS.mean():.4f}") | |
| # # 4. BLEU & ROUGE | |
| # gold_tokens, pred_tokens = get_tokens(gold_edges, pred_edges) | |
| # p_rouge, r_rouge, f1_rouge, p_bleu, r_bleu, f1_bleu = get_bleu_rouge( | |
| # gold_tokens, pred_tokens, gold_edges, pred_edges | |
| # ) | |
| # print(f"\nBLEU分数:") | |
| # print(f"- Precision: {p_bleu.mean():.4f}") | |
| # print(f"- Recall: {r_bleu.mean():.4f}") | |
| # print(f"- F1: {f1_bleu.mean():.4f}") | |
| # print(f"\nROUGE分数:") | |
| # print(f"- Precision: {p_rouge.mean():.4f}") | |
| # print(f"- Recall: {r_rouge.mean():.4f}") | |
| # print(f"- F1: {f1_rouge.mean():.4f}") | |
| # # 5. 图编辑距离(GED) | |
| # total_ged = 0 | |
| # for gold, pred in zip(gold_graphs, pred_graphs): | |
| # ged = get_ged(gold, pred) | |
| # total_ged += ged | |
| # avg_ged = total_ged / len(gold_graphs) | |
| # print(f"\n平均图编辑距离: {avg_ged:.4f}") | |
| # 返回所有指标 | |
| return { | |
| 'triple_match': { | |
| 'precision': precision, | |
| 'recall': recall, | |
| 'f1': f1 | |
| }, | |
| # 'graph_acc': graph_acc, | |
| 'bert_score': { | |
| 'precision': precisions_BS.mean(), | |
| 'recall': recalls_BS.mean(), | |
| 'f1': f1s_BS.mean() | |
| }, | |
| # 'bleu': { | |
| # 'precision': p_bleu.mean(), | |
| # 'recall': r_bleu.mean(), | |
| # 'f1': f1_bleu.mean() | |
| # }, | |
| # 'rouge': { | |
| # 'precision': p_rouge.mean(), | |
| # 'recall': r_rouge.mean(), | |
| # 'f1': f1_rouge.mean() | |
| # }, | |
| # 'ged': avg_ged | |
| } | |
| if __name__ == '__main__': | |
| import pandas as pd | |
| # # 设置文件路径 | |
| # gold_path = './data/train_triples.json' | |
| # pred_path = './output/gpt.json' | |
| # # 加载数据 | |
| # gold_graphs, pred_graphs = load_data(gold_path, pred_path) | |
| # # 评估并打印结果 | |
| # results = evaluate_triples(gold_graphs, pred_graphs) | |
| # 加载地质描述文本,提取prompt和label | |
| with open('./data/train_triples.json', 'r', encoding='utf-8') as f: | |
| data = json.load(f) | |
| # 将data转换为DataFrame | |
| df = pd.DataFrame(data) | |
| # 提取prompt和label | |
| text = df['text'] | |
| label = df['triple_list'] | |
| # 设置文件路径 | |
| gold_path = './data/GT_500.json' | |
| model_paths = [ | |
| # # gpt-3.5 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/old/gpt-3.5-turbo.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/gpt-3p5-turbo.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/gpt-3p5-turbo.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/gpt-3p5-turbo.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/gpt-3p5-turbo.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/gpt-3p5-turbo.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/gpt-3p5-turbo.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/gpt-3p5-turbo.json', # 知识引导单样本 | |
| # # gpt-4o | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/old/gpt-4o.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/gpt-4o.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/gpt-4o.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/gpt-4o.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/gpt-4o.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/gpt-4o.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/gpt-4o.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/gpt-4o.json', # 知识引导单样本 | |
| # # gemini-1p5-pro-002 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/gemini-1p5-pro-002.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/gemini-1p5-pro-002.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/gemini-1p5-pro-002.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/gemini-1p5-pro-002.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/gemini-1p5-pro-002.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/gemini-1p5-pro-002.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/gemini-1p5-pro-002.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/gemini-1p5-pro-002.json', # 知识引导单样本 | |
| # # claude-3-5-haiku-20241022 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/claude-3-5-haiku-20241022.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/claude-3-5-haiku-20241022.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/claude-3-5-haiku-20241022.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/claude-3-5-haiku-20241022.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/claude-3-5-haiku-20241022.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/claude-3-5-haiku-20241022.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/claude-3-5-haiku-20241022.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/claude-3-5-haiku-20241022.json', # 知识引导单样本 | |
| # # deepseek-ai | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/deepseek-ai/DeepSeek-V3.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/deepseek-ai/DeepSeek-V3.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/deepseek-ai/DeepSeek-V3.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/deepseek-ai/DeepSeek-V3.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/deepseek-ai/DeepSeek-V3.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/deepseek-ai/DeepSeek-V3.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/deepseek-ai/DeepSeek-V3.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/deepseek-ai/DeepSeek-V3.json', # 知识引导单样本 | |
| # # R1 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/deepseek-ai/DeepSeek-R1.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/deepseek-ai/DeepSeek-R1.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/deepseek-ai/DeepSeek-R1.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/deepseek-ai/DeepSeek-R1.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/deepseek-ai/DeepSeek-R1.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/deepseek-ai/DeepSeek-R1.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/deepseek-ai/DeepSeek-R1.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/deepseek-ai/DeepSeek-R1.json', # 知识引导单样本 | |
| # # meta-llama | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/meta-llama/Meta-Llama-3p1-405B-Instruct.json', # 知识引导单样本 | |
| # # Qwen | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/zero_shot/Qwen/Qwen2p5-72B-Instruct.json', # 零样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/one_shot/Qwen/Qwen2p5-72B-Instruct.json', # 单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/two_shot/Qwen/Qwen2p5-72B-Instruct.json', # 双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/nomal/three_shot/Qwen/Qwen2p5-72B-Instruct.json', # 三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/one_shot/Qwen/Qwen2p5-72B-Instruct.json', # KNN单样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/two_shot/Qwen/Qwen2p5-72B-Instruct.json', # KNN双样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/knn/three_shot/Qwen/Qwen2p5-72B-Instruct.json', # KNN三样本 | |
| # 'F:/GeoLLM/output/output_result/Task1/Knowledge-guided/one_shot/Qwen/Qwen2p5-72B-Instruct.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/gpt-3.5-turbo_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/gpt-3.5-turbo_0407.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/gpt-3.5-turbo_old.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/gpt-4o_konwledge_tri.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/gemini-1.5-pro-002_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/claude-3-5-haiku-20241022_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/deepseek-ai/DeepSeek-V3_0420.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_only_tri/deepseek-ai/DeepSeek-V3.json', # 知识引导单样本 | |
| 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-R1_guide.json', # 知识引导单样本 | |
| 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-R1_one_shot.json', # 知识引导单样本 | |
| 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_only_tri/deepseek-ai/DeepSeek-R1.json' | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/meta-llama/Meta-Llama-3.1-405B-Instruct_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/one_shot/Qwen/Qwen2.5-72B-Instruct_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/gpt-3.5-turbo_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/gpt-4o_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/gemini-1.5-pro-002_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/claude-3-5-haiku-20241022_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-V3_one_shot.json', # 知识引导单样本 | |
| # # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-R1.json', # 知识引导单样本 | |
| # # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-R1_one_shot.json' | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/meta-llama/Meta-Llama-3.1-405B-Instruct_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/Qwen/Qwen2.5-72B-Instruct_one_shot.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/gpt-3.5-turbo.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/gpt-4o.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/gemini-1.5-pro-002.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/claude-3-5-haiku-20241022.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-V3.json', # 知识引导单样本 | |
| # # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-R1.json', # 知识引导单样本 | |
| # # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/deepseek-ai/DeepSeek-R1_one_shot.json' | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/meta-llama/Meta-Llama-3.1-405B-Instruct.json', # 知识引导单样本 | |
| # 'F:/GeoLLM/output/Knowledge-guided_rerun/nomal_all/Qwen/Qwen2.5-72B-Instruct.json', # 知识引导单样本 | |
| ] | |
| # 对比不同模型的表现 | |
| print("各模型评估结果:") | |
| # 存储所有模型的结果 | |
| all_results = {} | |
| for pred_path in model_paths: | |
| model_name = pred_path.split('/')[-1].split('.')[0] | |
| print(f"\n{model_name}模型:") | |
| # 加载数据 | |
| gold_graphs, pred_graphs = load_data(gold_path, pred_path) | |
| # 评估并打印结果 | |
| results = evaluate_triples(gold_graphs, pred_graphs) | |
| all_results[model_name] = results | |
| # 连带model_paths和results一起保存为txt | |
| save_path = 'F:/GeoLLM/output/output_result/Task1/Result_Task1.txt' | |
| with open(save_path, 'a', encoding='utf-8') as f: | |
| f.write(f"{pred_path}: \n") | |
| f.write(f"Triple Match: \n") | |
| f.write(f"精确率: {results['triple_match']['precision']:.4f}, 召回率: {results['triple_match']['recall']:.4f}, F1: {results['triple_match']['f1']:.4f}\n") | |
| f.write(f"BERT Score: \n") | |
| f.write(f"- Precision: {results['bert_score']['precision']:.4f}\n") | |
| f.write(f"- Recall: {results['bert_score']['recall']:.4f}\n") | |
| f.write(f"- F1: {results['bert_score']['f1']:.4f}\n\n") | |