Created using Colab
Browse files
notebooks/06-Evaluate_RAG.ipynb
CHANGED
@@ -24,10 +24,10 @@
|
|
24 |
"execution_count": 1,
|
25 |
"metadata": {
|
26 |
"id": "QPJzr-I9XQ7l",
|
27 |
-
"outputId": "71591538-a161-4a0a-e2c4-057bd2de6941",
|
28 |
"colab": {
|
29 |
"base_uri": "https://localhost:8080/"
|
30 |
-
}
|
|
|
31 |
},
|
32 |
"outputs": [
|
33 |
{
|
@@ -91,7 +91,7 @@
|
|
91 |
"import os\n",
|
92 |
"\n",
|
93 |
"# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n",
|
94 |
-
"os.environ[\"OPENAI_API_KEY\"] = \"
|
95 |
]
|
96 |
},
|
97 |
{
|
@@ -809,21 +809,20 @@
|
|
809 |
}
|
810 |
],
|
811 |
"source": [
|
812 |
-
"from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator,
|
813 |
"from llama_index.llms.openai import OpenAI\n",
|
814 |
"\n",
|
815 |
"llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
|
816 |
"\n",
|
817 |
"faithfulness_evaluator = FaithfulnessEvaluator(llm=llm_gpt4)\n",
|
818 |
"relevancy_evaluator = RelevancyEvaluator(llm=llm_gpt4)\n",
|
819 |
-
"correctness_evaluator = CorrectnessEvaluator(llm=llm_gpt4)\n",
|
820 |
"\n",
|
821 |
"# Run evaluation\n",
|
822 |
"queries = list(rag_eval_dataset.queries.values())\n",
|
823 |
"batch_eval_queries = queries[:20]\n",
|
824 |
"\n",
|
825 |
"runner = BatchEvalRunner(\n",
|
826 |
-
"{\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator
|
827 |
"workers=32,\n",
|
828 |
")\n",
|
829 |
"\n",
|
@@ -838,10 +837,7 @@
|
|
838 |
" print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
|
839 |
"\n",
|
840 |
" relevancy_score = sum(result.passing for result in eval_results['relevancy']) / len(eval_results['relevancy'])\n",
|
841 |
-
" print(f\"top_{i} relevancy_score: {relevancy_score}\")\n"
|
842 |
-
"\n",
|
843 |
-
" correctness = sum(result.passing for result in eval_results['correctness']) / len(eval_results['correctness'])\n",
|
844 |
-
" print(f\"top_{i} correctness: {correctness}\")\n"
|
845 |
]
|
846 |
},
|
847 |
{
|
|
|
24 |
"execution_count": 1,
|
25 |
"metadata": {
|
26 |
"id": "QPJzr-I9XQ7l",
|
|
|
27 |
"colab": {
|
28 |
"base_uri": "https://localhost:8080/"
|
29 |
+
},
|
30 |
+
"outputId": "71591538-a161-4a0a-e2c4-057bd2de6941"
|
31 |
},
|
32 |
"outputs": [
|
33 |
{
|
|
|
91 |
"import os\n",
|
92 |
"\n",
|
93 |
"# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n",
|
94 |
+
"os.environ[\"OPENAI_API_KEY\"] = \"[YOUR_OPENAI_KEY]\""
|
95 |
]
|
96 |
},
|
97 |
{
|
|
|
809 |
}
|
810 |
],
|
811 |
"source": [
|
812 |
+
"from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
|
813 |
"from llama_index.llms.openai import OpenAI\n",
|
814 |
"\n",
|
815 |
"llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
|
816 |
"\n",
|
817 |
"faithfulness_evaluator = FaithfulnessEvaluator(llm=llm_gpt4)\n",
|
818 |
"relevancy_evaluator = RelevancyEvaluator(llm=llm_gpt4)\n",
|
|
|
819 |
"\n",
|
820 |
"# Run evaluation\n",
|
821 |
"queries = list(rag_eval_dataset.queries.values())\n",
|
822 |
"batch_eval_queries = queries[:20]\n",
|
823 |
"\n",
|
824 |
"runner = BatchEvalRunner(\n",
|
825 |
+
"{\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
|
826 |
"workers=32,\n",
|
827 |
")\n",
|
828 |
"\n",
|
|
|
837 |
" print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
|
838 |
"\n",
|
839 |
" relevancy_score = sum(result.passing for result in eval_results['relevancy']) / len(eval_results['relevancy'])\n",
|
840 |
+
" print(f\"top_{i} relevancy_score: {relevancy_score}\")\n"
|
|
|
|
|
|
|
841 |
]
|
842 |
},
|
843 |
{
|