ragtest-sakimilo

Sleeping

App Files Files Community

lingyit1108 commited on Jan 30

Commit

5a39f92

•

1 Parent(s): 06f450b

added fine-tuning notebook example

Browse files

Files changed (1) hide show

notebooks/fine-tuning-embedding-model.ipynb +1114 -15

notebooks/fine-tuning-embedding-model.ipynb CHANGED Viewed

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "ca2c990f-5215-4ab9-8143-1d79db28edc6",
    "metadata": {},
    "outputs": [],
@@ -16,7 +16,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "2c535ad7-7846-4bef-8ba8-33e182490c3d",
    "metadata": {},
    "outputs": [],
@@ -30,7 +30,33 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "12527049-a5cb-423c-8de5-099aee970c85",
    "metadata": {},
    "outputs": [],
@@ -40,10 +66,18 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "abde5e6c-3474-460c-9fac-4f3352c38b53",
    "metadata": {},
-   "outputs": [],
    "source": [
     "import llama_index\n",
     "print(llama_index.__version__)"
@@ -59,7 +93,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "978cf71f-1ce7-4598-92fe-18fe22ca37c6",
    "metadata": {},
    "outputs": [],
@@ -81,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "26f614c8-eb45-4cc1-b067-2c7299587982",
    "metadata": {},
    "outputs": [],
@@ -114,7 +148,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "84cc4308-8ac4-4eba-9478-b81d5b645c48",
    "metadata": {},
    "outputs": [],
@@ -150,7 +184,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "8f17c832-e9ae-477b-8bf7-a9c8410f1ed8",
    "metadata": {},
    "outputs": [],
@@ -159,23 +193,67 @@
     "    train_dataset,\n",
     "    model_id=\"BAAI/bge-small-en-v1.5\",\n",
     "    model_output_path=\"test_model\",\n",
-    "    val_dataset=val_dataset,\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "a6498d0b-da9a-4f7f-8c85-c9bf4d772c72",
    "metadata": {},
-   "outputs": [],
    "source": [
     "finetune_engine.finetune()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "e057b405-aa0e-4e78-91e0-9bf40f01c1a9",
    "metadata": {},
    "outputs": [],
@@ -185,10 +263,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "72d9f97a-0902-4e65-8459-b34613e419f6",
    "metadata": {},
-   "outputs": [],
    "source": [
     "embed_model"
    ]
@@ -200,6 +289,1016 @@
    "metadata": {},
    "outputs": [],
    "source": []
   }
  ],
  "metadata": {

  "cells": [
   {
    "cell_type": "code",
+   "execution_count": 2,
    "id": "ca2c990f-5215-4ab9-8143-1d79db28edc6",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 4,
    "id": "2c535ad7-7846-4bef-8ba8-33e182490c3d",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 19,
+   "id": "25f0c7a3-c52f-4417-aec8-4b6cfbf7a1b5",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.embeddings import OpenAIEmbedding\n",
+    "from llama_index import ServiceContext, VectorStoreIndex\n",
+    "from llama_index.schema import TextNode\n",
+    "from tqdm.notebook import tqdm\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "id": "62f4d7f0-748a-405e-b5f1-6520fd02bedc",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sentence_transformers.evaluation import InformationRetrievalEvaluator\n",
+    "from sentence_transformers import SentenceTransformer\n",
+    "from pathlib import Path"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
    "id": "12527049-a5cb-423c-8de5-099aee970c85",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "abde5e6c-3474-460c-9fac-4f3352c38b53",
    "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "0.9.39\n"
+     ]
+    }
+   ],
    "source": [
     "import llama_index\n",
     "print(llama_index.__version__)"
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "978cf71f-1ce7-4598-92fe-18fe22ca37c6",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "26f614c8-eb45-4cc1-b067-2c7299587982",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "84cc4308-8ac4-4eba-9478-b81d5b645c48",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 11,
    "id": "8f17c832-e9ae-477b-8bf7-a9c8410f1ed8",
    "metadata": {},
    "outputs": [],
     "    train_dataset,\n",
     "    model_id=\"BAAI/bge-small-en-v1.5\",\n",
     "    model_output_path=\"test_model\",\n",
+    "    batch_size=5,\n",
+    "    val_dataset=val_dataset\n",
     ")"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 12,
    "id": "a6498d0b-da9a-4f7f-8c85-c9bf4d772c72",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "e80f94e7c7a84014b3cbf270dde3fcaf",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Epoch:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "d02eb3c3b1454494a566557e8b73174f",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Iteration:   0%|          | 0/183 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0d73a19c286e43afa7c12cfb5fb49d34",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Iteration:   0%|          | 0/183 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
    "source": [
     "finetune_engine.finetune()"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 13,
    "id": "e057b405-aa0e-4e78-91e0-9bf40f01c1a9",
    "metadata": {},
    "outputs": [],
   },
   {
    "cell_type": "code",
+   "execution_count": 14,
    "id": "72d9f97a-0902-4e65-8459-b34613e419f6",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "HuggingFaceEmbedding(model_name='test_model', embed_batch_size=10, callback_manager=<llama_index.callbacks.base.CallbackManager object at 0x3c7fadca0>, tokenizer_name='test_model', max_length=512, pooling=<Pooling.CLS: 'cls'>, normalize=True, query_instruction=None, text_instruction=None, cache_folder=None)"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
    "source": [
     "embed_model"
    ]
    "metadata": {},
    "outputs": [],
    "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dad7589f-4855-4432-b710-01aff9c134ee",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "ac4a1a5b-974d-452e-8507-0950c962f9b2",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate(\n",
+    "    dataset,\n",
+    "    embed_model,\n",
+    "    top_k=5,\n",
+    "    verbose=False,\n",
+    "):\n",
+    "    corpus = dataset.corpus\n",
+    "    queries = dataset.queries\n",
+    "    relevant_docs = dataset.relevant_docs\n",
+    "\n",
+    "    service_context = ServiceContext.from_defaults(embed_model=embed_model)\n",
+    "    nodes = [TextNode(id_=id_, text=text) for id_, text in corpus.items()]\n",
+    "    index = VectorStoreIndex(\n",
+    "        nodes, service_context=service_context, show_progress=True\n",
+    "    )\n",
+    "    retriever = index.as_retriever(similarity_top_k=top_k)\n",
+    "\n",
+    "    eval_results = []\n",
+    "    for query_id, query in tqdm(queries.items()):\n",
+    "        retrieved_nodes = retriever.retrieve(query)\n",
+    "        retrieved_ids = [node.node.node_id for node in retrieved_nodes]\n",
+    "        expected_id = relevant_docs[query_id][0]\n",
+    "        is_hit = expected_id in retrieved_ids  # assume 1 relevant doc\n",
+    "\n",
+    "        eval_result = {\n",
+    "            \"is_hit\": is_hit,\n",
+    "            \"retrieved\": retrieved_ids,\n",
+    "            \"expected\": expected_id,\n",
+    "            \"query\": query_id,\n",
+    "        }\n",
+    "        eval_results.append(eval_result)\n",
+    "    return eval_results"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "a53cf893-ce9f-4d9d-ad4a-e9e17fb058d3",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def evaluate_st(\n",
+    "    dataset,\n",
+    "    model_id,\n",
+    "    name,\n",
+    "):\n",
+    "    corpus = dataset.corpus\n",
+    "    queries = dataset.queries\n",
+    "    relevant_docs = dataset.relevant_docs\n",
+    "\n",
+    "    evaluator = InformationRetrievalEvaluator(\n",
+    "        queries, corpus, relevant_docs, name=name\n",
+    "    )\n",
+    "    model = SentenceTransformer(model_id)\n",
+    "    output_path = \"results/\"\n",
+    "    Path(output_path).mkdir(exist_ok=True, parents=True)\n",
+    "    return evaluator(model, output_path=output_path)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "703f9350-f7ab-43cc-abdf-055323ef67dd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "57d66621-49e6-4a8a-9ef2-83b2b33e33d7",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b43ad08e-e96d-412b-9a88-14fe3af85b3d",
+   "metadata": {},
+   "source": [
+    "### Using OpenAI Ada embedding"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 21,
+   "id": "91f057aa-4b59-48ea-b3d5-23012a4d487f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "f4bf05fbe14c4c379c0b3e1912b84d36",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating embeddings:   0%|          | 0/100 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
+      "To disable this warning, you can either:\n",
+      "\t- Avoid using `tokenizers` before the fork if possible\n",
+      "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
+     ]
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "4f365d1cab004fe897949e2a3928c457",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/200 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "ada = OpenAIEmbedding()\n",
+    "ada_val_results = evaluate(val_dataset, ada)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "id": "5d2f59c6-75d3-4970-bac3-dfe0eef00efe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_ada = pd.DataFrame(ada_val_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 24,
+   "id": "7a697cd8-6f39-4d5b-84f4-f08cf58adc4a",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>is_hit</th>\n",
+       "      <th>retrieved</th>\n",
+       "      <th>expected</th>\n",
+       "      <th>query</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>False</td>\n",
+       "      <td>[5b9cd986-33dc-46f1-abae-e4e1dc9e3629, c3c1804...</td>\n",
+       "      <td>6a756f03-638d-480d-8222-1a6bf3790e3c</td>\n",
+       "      <td>011d84b2-0c26-4c5c-89d1-2a85498f30e0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[6a756f03-638d-480d-8222-1a6bf3790e3c, c3c1804...</td>\n",
+       "      <td>6a756f03-638d-480d-8222-1a6bf3790e3c</td>\n",
+       "      <td>70c5ddd7-eb86-4a41-af70-a23d2392f48d</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[c83dbd8a-7e62-445e-8c12-a8ad604ff65e, 2177824...</td>\n",
+       "      <td>c83dbd8a-7e62-445e-8c12-a8ad604ff65e</td>\n",
+       "      <td>a8f4290a-1281-4272-aab9-bf089954a45e</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[c83dbd8a-7e62-445e-8c12-a8ad604ff65e, 2177824...</td>\n",
+       "      <td>c83dbd8a-7e62-445e-8c12-a8ad604ff65e</td>\n",
+       "      <td>c1ef991a-1cc6-4dbf-b179-2df688c84301</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[21778248-2ed9-4147-bdb0-a60337a1a599, c83dbd8...</td>\n",
+       "      <td>21778248-2ed9-4147-bdb0-a60337a1a599</td>\n",
+       "      <td>1ce25e78-c1e1-487e-9455-9418baa0b60c</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   is_hit                                          retrieved  \\\n",
+       "0   False  [5b9cd986-33dc-46f1-abae-e4e1dc9e3629, c3c1804...   \n",
+       "1    True  [6a756f03-638d-480d-8222-1a6bf3790e3c, c3c1804...   \n",
+       "2    True  [c83dbd8a-7e62-445e-8c12-a8ad604ff65e, 2177824...   \n",
+       "3    True  [c83dbd8a-7e62-445e-8c12-a8ad604ff65e, 2177824...   \n",
+       "4    True  [21778248-2ed9-4147-bdb0-a60337a1a599, c83dbd8...   \n",
+       "\n",
+       "                               expected                                 query  \n",
+       "0  6a756f03-638d-480d-8222-1a6bf3790e3c  011d84b2-0c26-4c5c-89d1-2a85498f30e0  \n",
+       "1  6a756f03-638d-480d-8222-1a6bf3790e3c  70c5ddd7-eb86-4a41-af70-a23d2392f48d  \n",
+       "2  c83dbd8a-7e62-445e-8c12-a8ad604ff65e  a8f4290a-1281-4272-aab9-bf089954a45e  \n",
+       "3  c83dbd8a-7e62-445e-8c12-a8ad604ff65e  c1ef991a-1cc6-4dbf-b179-2df688c84301  \n",
+       "4  21778248-2ed9-4147-bdb0-a60337a1a599  1ce25e78-c1e1-487e-9455-9418baa0b60c  "
+      ]
+     },
+     "execution_count": 24,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_ada[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "id": "3f7186fb-f392-4531-8959-25161e3905e4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.955, 200)"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hit_rate_ada = df_ada[\"is_hit\"].mean()\n",
+    "hit_rate_ada, len(df_ada)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d044399a-e55b-40b7-a09d-6fb838383bfa",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "66746f3e-638a-432c-a38d-7cb99d2093f7",
+   "metadata": {},
+   "source": [
+    "### Using BAAI bge-small model without fine-tuning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 26,
+   "id": "b2905831-0eb9-4ea7-a0b9-5db286b0965e",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "784a67a3d51a400cad53c52bb16121fc",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "1c0edb74b4154cb49931180def479320",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "model.safetensors:   0%|          | 0.00/133M [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "af9cb2f4d3934e9a991969f0083fa495",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "2370d77040d94ffb9a4d8ca2f45faa97",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "0b7c293a142d4eaf91673c17222d232a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "tokenizer.json:   0%|          | 0.00/711k [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "7fcb86d759084084a8e41aec12738e19",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "ab4d747b58f74fdb86481b7f936bf0c4",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating embeddings:   0%|          | 0/100 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "baa0bb9ae0da4dfc86c20308477415fa",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/200 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "bge = \"local:BAAI/bge-small-en-v1.5\"\n",
+    "bge_val_results = evaluate(val_dataset, bge)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 28,
+   "id": "4e66270d-d3f6-429e-9e48-e8062866aa02",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_bge = pd.DataFrame(bge_val_results)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "id": "698c1eb7-eba4-4383-98aa-931fc4ad56a4",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>is_hit</th>\n",
+       "      <th>retrieved</th>\n",
+       "      <th>expected</th>\n",
+       "      <th>query</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>False</td>\n",
+       "      <td>[69a5696d-0c0e-482a-b6a9-f7b87f19945f, fa650c7...</td>\n",
+       "      <td>6a756f03-638d-480d-8222-1a6bf3790e3c</td>\n",
+       "      <td>011d84b2-0c26-4c5c-89d1-2a85498f30e0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[6a756f03-638d-480d-8222-1a6bf3790e3c, d89a649...</td>\n",
+       "      <td>6a756f03-638d-480d-8222-1a6bf3790e3c</td>\n",
+       "      <td>70c5ddd7-eb86-4a41-af70-a23d2392f48d</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[c83dbd8a-7e62-445e-8c12-a8ad604ff65e, 2177824...</td>\n",
+       "      <td>c83dbd8a-7e62-445e-8c12-a8ad604ff65e</td>\n",
+       "      <td>a8f4290a-1281-4272-aab9-bf089954a45e</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[c83dbd8a-7e62-445e-8c12-a8ad604ff65e, ad2e3eb...</td>\n",
+       "      <td>c83dbd8a-7e62-445e-8c12-a8ad604ff65e</td>\n",
+       "      <td>c1ef991a-1cc6-4dbf-b179-2df688c84301</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>True</td>\n",
+       "      <td>[21778248-2ed9-4147-bdb0-a60337a1a599, c83dbd8...</td>\n",
+       "      <td>21778248-2ed9-4147-bdb0-a60337a1a599</td>\n",
+       "      <td>1ce25e78-c1e1-487e-9455-9418baa0b60c</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   is_hit                                          retrieved  \\\n",
+       "0   False  [69a5696d-0c0e-482a-b6a9-f7b87f19945f, fa650c7...   \n",
+       "1    True  [6a756f03-638d-480d-8222-1a6bf3790e3c, d89a649...   \n",
+       "2    True  [c83dbd8a-7e62-445e-8c12-a8ad604ff65e, 2177824...   \n",
+       "3    True  [c83dbd8a-7e62-445e-8c12-a8ad604ff65e, ad2e3eb...   \n",
+       "4    True  [21778248-2ed9-4147-bdb0-a60337a1a599, c83dbd8...   \n",
+       "\n",
+       "                               expected                                 query  \n",
+       "0  6a756f03-638d-480d-8222-1a6bf3790e3c  011d84b2-0c26-4c5c-89d1-2a85498f30e0  \n",
+       "1  6a756f03-638d-480d-8222-1a6bf3790e3c  70c5ddd7-eb86-4a41-af70-a23d2392f48d  \n",
+       "2  c83dbd8a-7e62-445e-8c12-a8ad604ff65e  a8f4290a-1281-4272-aab9-bf089954a45e  \n",
+       "3  c83dbd8a-7e62-445e-8c12-a8ad604ff65e  c1ef991a-1cc6-4dbf-b179-2df688c84301  \n",
+       "4  21778248-2ed9-4147-bdb0-a60337a1a599  1ce25e78-c1e1-487e-9455-9418baa0b60c  "
+      ]
+     },
+     "execution_count": 29,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_bge[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "id": "9b1cb546-4605-4c48-bf4e-df812db97f13",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(0.915, 200)"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hit_rate_bge = df_bge[\"is_hit\"].mean()\n",
+    "hit_rate_bge, len(df_bge)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7dd69ad1-2153-4df0-93f7-807fc289d3fd",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 31,
+   "id": "1b12ca3d-6ca2-41f6-9ddb-b12b9354ca83",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.7955697668171072"
+      ]
+     },
+     "execution_count": 31,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "evaluate_st(val_dataset, \"BAAI/bge-small-en-v1.5\", name=\"bge\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6023382b-0ff5-4d60-aeac-ad523153f943",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "adf35a2a-3bb7-4251-9521-f35346a7c6e6",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "b3d290c2-784f-4c41-a258-e11d2c5117e7",
+   "metadata": {},
+   "source": [
+    "### Using BAAI bge-small model with `fine-tuning`"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "id": "bd42b288-1f1f-41aa-9fd4-1ae4b1df462b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "47dbb97a78c04f7f8fc1264c1013b5ea",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Generating embeddings:   0%|          | 0/100 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    },
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "31c9e93debe34cc790bf32e579134a1a",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "  0%|          | 0/200 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "finetuned = \"local:test_model\"\n",
+    "val_results_finetuned = evaluate(val_dataset, finetuned)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "id": "b1d7112d-b1b8-47db-8a4b-6c024ef99dd6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_finetuned = pd.DataFrame(val_results_finetuned)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "id": "62a4dd29-0631-4c5b-88e1-be43d48e1043",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.97"
+      ]
+     },
+     "execution_count": 34,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "hit_rate_finetuned = df_finetuned[\"is_hit\"].mean()\n",
+    "hit_rate_finetuned"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 35,
+   "id": "4332594b-c861-40fb-a58b-ba36717d0519",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "0.8573385846534823"
+      ]
+     },
+     "execution_count": 35,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "evaluate_st(val_dataset, \"test_model\", name=\"finetuned\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "b0003812-84a2-4ebd-9372-07bf874a486b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "ae7eb6ff-181b-42c8-975c-ca3320158698",
+   "metadata": {},
+   "source": [
+    "### Summary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 36,
+   "id": "3ca46cff-b186-463a-847d-a86c310268ec",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_ada[\"model\"] = \"ada\"\n",
+    "df_bge[\"model\"] = \"bge\"\n",
+    "df_finetuned[\"model\"] = \"fine_tuned\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 37,
+   "id": "d1d3053e-2395-48a0-af59-fd27180e1e7b",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>is_hit</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>model</th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>ada</th>\n",
+       "      <td>0.955</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>bge</th>\n",
+       "      <td>0.915</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fine_tuned</th>\n",
+       "      <td>0.970</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            is_hit\n",
+       "model             \n",
+       "ada          0.955\n",
+       "bge          0.915\n",
+       "fine_tuned   0.970"
+      ]
+     },
+     "execution_count": 37,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_all = pd.concat([df_ada, df_bge, df_finetuned])\n",
+    "df_all.groupby(\"model\").mean(\"is_hit\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "72575c28-a221-4967-8f04-9579dcefa8f8",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
+   "id": "032cac38-c856-4aeb-9bbb-6d70ed53c614",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "df_st_bge = pd.read_csv(\n",
+    "    \"results/Information-Retrieval_evaluation_bge_results.csv\"\n",
+    ")\n",
+    "df_st_finetuned = pd.read_csv(\n",
+    "    \"results/Information-Retrieval_evaluation_finetuned_results.csv\"\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "a509f239-8b28-4d0a-9101-c8de91c7943b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 39,
+   "id": "d2975262-c486-4a9a-a61f-ea535203a0f3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>epoch</th>\n",
+       "      <th>steps</th>\n",
+       "      <th>cos_sim-Accuracy@1</th>\n",
+       "      <th>cos_sim-Accuracy@3</th>\n",
+       "      <th>cos_sim-Accuracy@5</th>\n",
+       "      <th>cos_sim-Accuracy@10</th>\n",
+       "      <th>cos_sim-Precision@1</th>\n",
+       "      <th>cos_sim-Recall@1</th>\n",
+       "      <th>cos_sim-Precision@3</th>\n",
+       "      <th>cos_sim-Recall@3</th>\n",
+       "      <th>...</th>\n",
+       "      <th>dot_score-Recall@1</th>\n",
+       "      <th>dot_score-Precision@3</th>\n",
+       "      <th>dot_score-Recall@3</th>\n",
+       "      <th>dot_score-Precision@5</th>\n",
+       "      <th>dot_score-Recall@5</th>\n",
+       "      <th>dot_score-Precision@10</th>\n",
+       "      <th>dot_score-Recall@10</th>\n",
+       "      <th>dot_score-MRR@10</th>\n",
+       "      <th>dot_score-NDCG@10</th>\n",
+       "      <th>dot_score-MAP@100</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>model</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>bge</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>0.705</td>\n",
+       "      <td>0.865</td>\n",
+       "      <td>0.92</td>\n",
+       "      <td>0.96</td>\n",
+       "      <td>0.705</td>\n",
+       "      <td>0.705</td>\n",
+       "      <td>0.288333</td>\n",
+       "      <td>0.865</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.705</td>\n",
+       "      <td>0.288333</td>\n",
+       "      <td>0.865</td>\n",
+       "      <td>0.184</td>\n",
+       "      <td>0.92</td>\n",
+       "      <td>0.096</td>\n",
+       "      <td>0.96</td>\n",
+       "      <td>0.792935</td>\n",
+       "      <td>0.833595</td>\n",
+       "      <td>0.795570</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>fine_tuned</th>\n",
+       "      <td>-1</td>\n",
+       "      <td>-1</td>\n",
+       "      <td>0.790</td>\n",
+       "      <td>0.900</td>\n",
+       "      <td>0.97</td>\n",
+       "      <td>0.98</td>\n",
+       "      <td>0.790</td>\n",
+       "      <td>0.790</td>\n",
+       "      <td>0.300000</td>\n",
+       "      <td>0.900</td>\n",
+       "      <td>...</td>\n",
+       "      <td>0.790</td>\n",
+       "      <td>0.300000</td>\n",
+       "      <td>0.900</td>\n",
+       "      <td>0.194</td>\n",
+       "      <td>0.97</td>\n",
+       "      <td>0.098</td>\n",
+       "      <td>0.98</td>\n",
+       "      <td>0.856264</td>\n",
+       "      <td>0.886738</td>\n",
+       "      <td>0.857339</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>2 rows × 32 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "            epoch  steps  cos_sim-Accuracy@1  cos_sim-Accuracy@3  \\\n",
+       "model                                                              \n",
+       "bge            -1     -1               0.705               0.865   \n",
+       "fine_tuned     -1     -1               0.790               0.900   \n",
+       "\n",
+       "            cos_sim-Accuracy@5  cos_sim-Accuracy@10  cos_sim-Precision@1  \\\n",
+       "model                                                                      \n",
+       "bge                       0.92                 0.96                0.705   \n",
+       "fine_tuned                0.97                 0.98                0.790   \n",
+       "\n",
+       "            cos_sim-Recall@1  cos_sim-Precision@3  cos_sim-Recall@3  ...  \\\n",
+       "model                                                                ...   \n",
+       "bge                    0.705             0.288333             0.865  ...   \n",
+       "fine_tuned             0.790             0.300000             0.900  ...   \n",
+       "\n",
+       "            dot_score-Recall@1  dot_score-Precision@3  dot_score-Recall@3  \\\n",
+       "model                                                                       \n",
+       "bge                      0.705               0.288333               0.865   \n",
+       "fine_tuned               0.790               0.300000               0.900   \n",
+       "\n",
+       "            dot_score-Precision@5  dot_score-Recall@5  dot_score-Precision@10  \\\n",
+       "model                                                                           \n",
+       "bge                         0.184                0.92                   0.096   \n",
+       "fine_tuned                  0.194                0.97                   0.098   \n",
+       "\n",
+       "            dot_score-Recall@10  dot_score-MRR@10  dot_score-NDCG@10  \\\n",
+       "model                                                                  \n",
+       "bge                        0.96          0.792935           0.833595   \n",
+       "fine_tuned                 0.98          0.856264           0.886738   \n",
+       "\n",
+       "            dot_score-MAP@100  \n",
+       "model                          \n",
+       "bge                  0.795570  \n",
+       "fine_tuned           0.857339  \n",
+       "\n",
+       "[2 rows x 32 columns]"
+      ]
+     },
+     "execution_count": 39,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df_st_bge[\"model\"] = \"bge\"\n",
+    "df_st_finetuned[\"model\"] = \"fine_tuned\"\n",
+    "df_st_all = pd.concat([df_st_bge, df_st_finetuned])\n",
+    "df_st_all = df_st_all.set_index(\"model\")\n",
+    "df_st_all"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6ed2321b-6618-4a2b-9b1c-028425e91b84",
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {