Spaces:

towardsai-tutors
/

ai-tutor-chatbot

Running

App Files Files Community

AlaFalaki commited on Jul 8, 2024

Commit

e68e63d

1 Parent(s): f798896

Created using Colab

Browse files

Files changed (1) hide show

notebooks/06-Evaluate_RAG.ipynb +20 -12

notebooks/06-Evaluate_RAG.ipynb CHANGED Viewed

@@ -21,7 +21,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 17,
       "metadata": {
         "id": "QPJzr-I9XQ7l"
       },
@@ -32,7 +32,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 18,
       "metadata": {
         "id": "riuXwpSPcvWC"
       },
@@ -46,7 +46,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 19,
       "metadata": {
         "id": "km-KQOrgr3VB"
       },
@@ -70,7 +70,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 1,
       "metadata": {
         "id": "SQP87lHczHKc"
       },
@@ -504,7 +504,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 21,
       "metadata": {
         "id": "mNDd5i921Hww"
       },
@@ -632,7 +632,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 22,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
@@ -662,27 +662,35 @@
         "from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
         "from llama_index.llms.openai import OpenAI\n",
         "\n",
         "llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
         "\n",
         "faithfulness_evaluator = FaithfulnessEvaluator(llm=llm_gpt4)\n",
         "relevancy_evaluator = RelevancyEvaluator(llm=llm_gpt4)\n",
         "\n",
-        "# Run evaluation\n",
         "queries = list(rag_eval_dataset.queries.values())\n",
         "batch_eval_queries = queries[:20]\n",
         "\n",
         "runner = BatchEvalRunner(\n",
         "{\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
         "workers=32,\n",
         ")\n",
         "\n",
         "for i in [2, 4, 6, 8, 10]:\n",
-        "    # Set Faithfulness and Relevancy evaluators\n",
         "    query_engine = index.as_query_engine(similarity_top_k=i)\n",
         "\n",
         "    eval_results = await runner.aevaluate_queries(\n",
         "        query_engine, queries=batch_eval_queries\n",
         "    )\n",
         "    faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n",
         "    print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
         "\n",
@@ -701,7 +709,7 @@
     },
     {
       "cell_type": "code",
-      "execution_count": 32,
       "metadata": {
         "id": "aUulxzuh1Hwx"
       },
@@ -743,7 +751,7 @@
       "metadata": {
         "id": "CYIjkAP74bly"
       },
-      "execution_count": 33,
       "outputs": []
     },
     {
@@ -758,7 +766,7 @@
         "id": "-3b-bgvA4dAz",
         "outputId": "7ced2102-6372-4794-82ad-1c7e60438088"
       },
-      "execution_count": 34,
       "outputs": [
         {
           "output_type": "execute_result",
@@ -785,7 +793,7 @@
         "id": "KNEhRQAo4dT0",
         "outputId": "4a5d7db9-b399-49ea-c90e-b1e076640a92"
       },
-      "execution_count": 35,
       "outputs": [
         {
           "output_type": "execute_result",

     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "QPJzr-I9XQ7l"
       },
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "riuXwpSPcvWC"
       },
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "km-KQOrgr3VB"
       },
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "SQP87lHczHKc"
       },
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "mNDd5i921Hww"
       },
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "colab": {
           "base_uri": "https://localhost:8080/"
         "from llama_index.core.evaluation import RelevancyEvaluator, FaithfulnessEvaluator, BatchEvalRunner\n",
         "from llama_index.llms.openai import OpenAI\n",
         "\n",
+        "# Define an LLM as a judge\n",
         "llm_gpt4 = OpenAI(temperature=0, model=\"gpt-4o\")\n",
         "\n",
+        "# Initiate the faithfulnes and relevancy evaluator objects\n",
         "faithfulness_evaluator = FaithfulnessEvaluator(llm=llm_gpt4)\n",
         "relevancy_evaluator = RelevancyEvaluator(llm=llm_gpt4)\n",
         "\n",
+        "# Extract the questions from the dataset\n",
         "queries = list(rag_eval_dataset.queries.values())\n",
+        "# Limit to first 20 question to save cost (!!remove this line in production!!)\n",
         "batch_eval_queries = queries[:20]\n",
         "\n",
+        "# The batch evaluator runs the evaluation in batches\n",
         "runner = BatchEvalRunner(\n",
         "{\"faithfulness\": faithfulness_evaluator, \"relevancy\": relevancy_evaluator},\n",
         "workers=32,\n",
         ")\n",
         "\n",
+        "# Define a for-loop to try different `similarity_top_k` values\n",
         "for i in [2, 4, 6, 8, 10]:\n",
+        "    # Set query engine with different number of returned chunks\n",
         "    query_engine = index.as_query_engine(similarity_top_k=i)\n",
         "\n",
+        "    # Run the evaluation\n",
         "    eval_results = await runner.aevaluate_queries(\n",
         "        query_engine, queries=batch_eval_queries\n",
         "    )\n",
+        "\n",
+        "    # Printing the results\n",
         "    faithfulness_score = sum(result.passing for result in eval_results['faithfulness']) / len(eval_results['faithfulness'])\n",
         "    print(f\"top_{i} faithfulness_score: {faithfulness_score}\")\n",
         "\n",
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "aUulxzuh1Hwx"
       },
       "metadata": {
         "id": "CYIjkAP74bly"
       },
+      "execution_count": null,
       "outputs": []
     },
     {
         "id": "-3b-bgvA4dAz",
         "outputId": "7ced2102-6372-4794-82ad-1c7e60438088"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",
         "id": "KNEhRQAo4dT0",
         "outputId": "4a5d7db9-b399-49ea-c90e-b1e076640a92"
       },
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "execute_result",