AlaFalaki commited on
Commit
1ff6bfb
Β·
1 Parent(s): 1c9827a

Created using Colab

Browse files
Files changed (1) hide show
  1. notebooks/06-Evaluate_RAG.ipynb +6 -10
notebooks/06-Evaluate_RAG.ipynb CHANGED
@@ -24,10 +24,10 @@
24
  "execution_count": 1,
25
  "metadata": {
26
  "id": "QPJzr-I9XQ7l",
27
- "outputId": "71591538-a161-4a0a-e2c4-057bd2de6941",
28
  "colab": {
29
  "base_uri": "https://localhost:8080/"
30
- }
 
31
  },
32
  "outputs": [
33
  {
@@ -91,7 +91,7 @@
91
  "import os\n",
92
  "\n",
93
  "# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n",
94
- "os.environ[\"OPENAI_API_KEY\"] = \"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\""
95
  ]
96
  },
97
  {
@@ -809,21 +809,20 @@
809
  }
810
  ],
811
  "source": [
812
- "from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, CorrectnessEvaluator, BatchEvalRunner\n",
813
  "from llama_index.llms.openai import OpenAI\n",
814
  "\n",
815
  "llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
816
  "\n",
817
  "faithfulness_evaluator = FaithfulnessEvaluator(llm=llm_gpt4)\n",
818
  "relevancy_evaluator = RelevancyEvaluator(llm=llm_gpt4)\n",
819
- "correctness_evaluator = CorrectnessEvaluator(llm=llm_gpt4)\n",
820
  "\n",
821
  "# Run evaluation\n",
822
  "queries = list(rag_eval_dataset.queries.values())\n",
823
  "batch_eval_queries = queries[:20]\n",
824
  "\n",
825
  "runner = BatchEvalRunner(\n",
826
- "{\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator, \"correctness\": correctness_evaluator},\n",
827
  "workers=32,\n",
828
  ")\n",
829
  "\n",
@@ -838,10 +837,7 @@
838
  " print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
839
  "\n",
840
  " relevancy_score = sum(result.passing for result in eval_results['relevancy']) / len(eval_results['relevancy'])\n",
841
- " print(f\"top_{i} relevancy_score: {relevancy_score}\")\n",
842
- "\n",
843
- " correctness = sum(result.passing for result in eval_results['correctness']) / len(eval_results['correctness'])\n",
844
- " print(f\"top_{i} correctness: {correctness}\")\n"
845
  ]
846
  },
847
  {
 
24
  "execution_count": 1,
25
  "metadata": {
26
  "id": "QPJzr-I9XQ7l",
 
27
  "colab": {
28
  "base_uri": "https://localhost:8080/"
29
+ },
30
+ "outputId": "71591538-a161-4a0a-e2c4-057bd2de6941"
31
  },
32
  "outputs": [
33
  {
 
91
  "import os\n",
92
  "\n",
93
  "# Set the \"OPENAI_API_KEY\" in the Python environment. Will be used by OpenAI client later.\n",
94
+ "os.environ[\"OPENAI_API_KEY\"] = \"[YOUR_OPENAI_KEY]\""
95
  ]
96
  },
97
  {
 
809
  }
810
  ],
811
  "source": [
812
+ "from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
813
  "from llama_index.llms.openai import OpenAI\n",
814
  "\n",
815
  "llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
816
  "\n",
817
  "faithfulness_evaluator = FaithfulnessEvaluator(llm=llm_gpt4)\n",
818
  "relevancy_evaluator = RelevancyEvaluator(llm=llm_gpt4)\n",
 
819
  "\n",
820
  "# Run evaluation\n",
821
  "queries = list(rag_eval_dataset.queries.values())\n",
822
  "batch_eval_queries = queries[:20]\n",
823
  "\n",
824
  "runner = BatchEvalRunner(\n",
825
+ "{\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
826
  "workers=32,\n",
827
  ")\n",
828
  "\n",
 
837
  " print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
838
  "\n",
839
  " relevancy_score = sum(result.passing for result in eval_results['relevancy']) / len(eval_results['relevancy'])\n",
840
+ " print(f\"top_{i} relevancy_score: {relevancy_score}\")\n"
 
 
 
841
  ]
842
  },
843
  {