{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Convert & Optimize model with Optimum \n", "\n", "\n", "Steps:\n", "1. Convert model to ONNX\n", "2. Optimize & quantize model with Optimum\n", "3. Create Custom Handler for Inference Endpoints\n", "\n", "Helpful links:\n", "* [Accelerate Sentence Transformers with Hugging Face Optimum](https://www.philschmid.de/optimize-sentence-transformers)\n", "* [Create Custom Handler Endpoints](https://link-to-docs)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup & Installation" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Writing requirements.txt\n" ] } ], "source": [ "%%writefile requirements.txt\n", "optimum[onnxruntime]==1.3.0\n", "mkl-include\n", "mkl" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "!pip install -r requirements.txt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Convert model to ONNX" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "2920b55a58bb41b78436f64d24b31d27", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Downloading: 0%| | 0.00/612 [00:00 List[List[Dict[str, float]]]:\n", " \"\"\"\n", " Args:\n", " data (:obj:):\n", " includes the input data and the parameters for the inference.\n", " Return:\n", " A :obj:`list`:. The list contains the embeddings of the inference inputs\n", " \"\"\"\n", " inputs = data.get(\"inputs\", data)\n", "\n", " # tokenize the input\n", " encoded_inputs = self.tokenizer(inputs, padding=True, truncation=True, return_tensors='pt')\n", " # run the model\n", " outputs = self.model(**encoded_inputs)\n", " # Perform pooling\n", " sentence_embeddings = mean_pooling(outputs, encoded_inputs['attention_mask'])\n", " # Normalize embeddings\n", " sentence_embeddings = F.normalize(sentence_embeddings, p=2, dim=1)\n", " # postprocess the prediction\n", " return {\"embeddings\": sentence_embeddings.tolist()}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "test custom pipeline" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1.55 ms ± 2.04 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)\n" ] } ], "source": [ "from pipeline import PreTrainedPipeline\n", "\n", "# init handler\n", "my_handler = PreTrainedPipeline(path=\".\")\n", "\n", "# prepare sample payload\n", "request = {\"inputs\": \"I am quite excited how this will turn out\"}\n", "\n", "# test the handler\n", "%timeit my_handler(request)\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'embeddings': [[-0.021580450236797333,\n", " 0.021715054288506508,\n", " 0.00979710929095745,\n", " -0.0005379787762649357,\n", " 0.04682469740509987,\n", " -0.013600599952042103,\n", " -0.003064213553443551,\n", " 0.007061154581606388,\n", " 0.026638098061084747,\n", " -0.011613409034907818,\n", " -0.06916121393442154,\n", " 0.061429575085639954,\n", " 0.013463253155350685,\n", " -0.022426923736929893,\n", " 0.04116947948932648,\n", " 0.03925771266222,\n", " 0.014005577191710472,\n", " -0.07909698039293289,\n", " -0.028196798637509346,\n", " -0.003196786157786846,\n", " 0.013688302598893642,\n", " -0.044537559151649475,\n", " -0.04594269394874573,\n", " -0.04054776579141617,\n", " -0.038281939923763275,\n", " 0.06411226838827133,\n", " -0.013305696658790112,\n", " -0.02935652621090412,\n", " -0.0150306923314929,\n", " -0.0434146448969841,\n", " 0.03218410909175873,\n", " 0.018695568665862083,\n", " -0.012916717678308487,\n", " 0.009855723939836025,\n", " -0.022609280422329903,\n", " -0.08628173172473907,\n", " 0.03853229060769081,\n", " -0.03584187850356102,\n", " 0.05425931513309479,\n", " -0.002929823938757181,\n", " -0.011379950679838657,\n", " -0.15505683422088623,\n", " 0.01120749581605196,\n", " 0.03100745379924774,\n", " 0.043684810400009155,\n", " 0.008617725223302841,\n", " 0.00824501272290945,\n", " -0.01545825693756342,\n", " -0.001658946624957025,\n", " 0.027067873626947403,\n", " -0.019667934626340866,\n", " -0.09459519386291504,\n", " 0.048974718898534775,\n", " -0.02965048886835575,\n", " -0.08003880828619003,\n", " 0.045240651816129684,\n", " -0.012594419531524181,\n", " -0.05546975135803223,\n", " 0.05608676001429558,\n", " -0.04186442866921425,\n", " -0.02615668624639511,\n", " 0.02160278894007206,\n", " 0.03741847351193428,\n", " 0.0008759248885326087,\n", " 0.03592744097113609,\n", " -0.12200205773115158,\n", " 0.06229585036635399,\n", " 0.01601075753569603,\n", " 0.040825288742780685,\n", " -0.08544802665710449,\n", " -0.029977118596434593,\n", " 0.03295058012008667,\n", " 0.05928152799606323,\n", " -0.052630465477705,\n", " 0.020404687151312828,\n", " 0.00725224195048213,\n", " 0.0009453881066292524,\n", " 0.04398864880204201,\n", " 0.071522556245327,\n", " 0.032034359872341156,\n", " 0.038474190980196,\n", " -0.032708171755075455,\n", " -0.011295354925096035,\n", " -0.045965589582920074,\n", " -0.041425369679927826,\n", " 0.0482286661863327,\n", " 0.008450332097709179,\n", " -0.03801262006163597,\n", " -0.0420663058757782,\n", " 0.05417492985725403,\n", " -0.09063713997602463,\n", " -0.007592180278152227,\n", " -0.009322550147771835,\n", " -0.02063363790512085,\n", " -0.03594734147191048,\n", " 0.07223387807607651,\n", " -0.03899461403489113,\n", " -0.0934303030371666,\n", " -0.03475493937730789,\n", " 0.09417884796857834,\n", " -0.03771593049168587,\n", " 0.0638294667005539,\n", " 0.032066185027360916,\n", " -0.08843936026096344,\n", " 0.012369371019303799,\n", " -0.03089563362300396,\n", " -0.005824724677950144,\n", " 0.08723752945661545,\n", " 0.02237764000892639,\n", " -0.03896152228116989,\n", " 0.025661000981926918,\n", " -0.005460284650325775,\n", " 0.05766639858484268,\n", " 0.025396188721060753,\n", " -0.03150532767176628,\n", " 0.09431672841310501,\n", " 0.035403359681367874,\n", " 0.09509390592575073,\n", " -0.015979617834091187,\n", " 0.04350188001990318,\n", " 0.046271294355392456,\n", " 0.009891007095575333,\n", " -0.0044189076870679855,\n", " -0.017476193606853485,\n", " 0.015222891233861446,\n", " 0.009962008334696293,\n", " -0.05670330300927162,\n", " -1.8742520903182187e-33,\n", " 0.017962634563446045,\n", " 0.023281103000044823,\n", " -0.013410707004368305,\n", " 0.10924902558326721,\n", " 0.036854133009910583,\n", " -0.039277151226997375,\n", " 0.026224950328469276,\n", " -0.04877658933401108,\n", " -0.0805993378162384,\n", " -0.0030330857262015343,\n", " -0.0028494936414062977,\n", " 0.018921272829174995,\n", " -0.01530009601265192,\n", " 0.1219208613038063,\n", " -0.07319916784763336,\n", " -0.10112590342760086,\n", " 0.006891624070703983,\n", " -0.002260996960103512,\n", " -0.007901495322585106,\n", " 0.017701659351587296,\n", " -0.08319021016359329,\n", " 0.048608407378196716,\n", " -0.05502907559275627,\n", " -0.03751670941710472,\n", " -0.004041539039462805,\n", " 0.07481412589550018,\n", " 0.0022187645081430674,\n", " -0.03369564935564995,\n", " -0.11100229620933533,\n", " 0.01231460366398096,\n", " -0.03582797944545746,\n", " 0.026462607085704803,\n", " -0.03912581503391266,\n", " -0.011205351911485195,\n", " -0.03137337043881416,\n", " 0.0059767672792077065,\n", " -0.1009056344628334,\n", " -0.06049555912613869,\n", " 0.021796569228172302,\n", " -0.014793958514928818,\n", " 0.03098255582153797,\n", " -0.00538264773786068,\n", " -0.04653438180685043,\n", " -0.02799016609787941,\n", " 0.023156380280852318,\n", " 0.07959774136543274,\n", " 0.043343499302864075,\n", " 0.02526552602648735,\n", " 0.05564416944980621,\n", " -0.0895266905426979,\n", " 0.02035175822675228,\n", " 0.00761762959882617,\n", " -0.01012750156223774,\n", " 0.10514233261346817,\n", " -0.00832043495029211,\n", " -0.018016740679740906,\n", " 0.01773231290280819,\n", " -0.13199643790721893,\n", " 0.11118609458208084,\n", " 0.0027006398886442184,\n", " 0.035123299807310104,\n", " 0.017120877280831337,\n", " -0.08685944974422455,\n", " 0.014364459551870823,\n", " -0.0697159692645073,\n", " 0.03414931520819664,\n", " 0.051882319152355194,\n", " -0.049169816076755524,\n", " -0.07678680121898651,\n", " 0.03500046953558922,\n", " -0.027233436703681946,\n", " 0.019955039024353027,\n", " -0.035047441720962524,\n", " -0.03964361920952797,\n", " -0.01907966658473015,\n", " 0.05322276055812836,\n", " -0.03573837876319885,\n", " -0.02035624347627163,\n", " 0.03240324929356575,\n", " 0.023124489933252335,\n", " 0.04587593674659729,\n", " 0.006914089433848858,\n", " 0.02254929207265377,\n", " -0.048369478434324265,\n", " 0.07502789050340652,\n", " -0.04454338923096657,\n", " 0.009581719525158405,\n", " -0.08176697790622711,\n", " -0.026596812531352043,\n", " 0.05699768289923668,\n", " 0.03196358308196068,\n", " -0.0818556547164917,\n", " 0.04586222395300865,\n", " 0.026800116524100304,\n", " 0.053372107446193695,\n", " 4.116422800348778e-34,\n", " 0.04144074022769928,\n", " -0.00046204423415474594,\n", " -0.05304589495062828,\n", " 0.006641748361289501,\n", " -0.05266479030251503,\n", " -0.02192983590066433,\n", " 0.010295987129211426,\n", " 0.1503780037164688,\n", " 0.06841202080249786,\n", " 0.012436892837285995,\n", " 0.02130315639078617,\n", " 0.05735220015048981,\n", " 0.020133396610617638,\n", " -0.019417081028223038,\n", " 0.018597068265080452,\n", " -0.060950521379709244,\n", " 0.14569053053855896,\n", " 0.046135421842336655,\n", " 0.014004155062139034,\n", " 0.06448501348495483,\n", " -0.03540049120783806,\n", " 0.05386977270245552,\n", " -0.04851151257753372,\n", " 0.04860413447022438,\n", " 0.003418552689254284,\n", " 0.026858657598495483,\n", " 0.08443755656480789,\n", " 0.0688081830739975,\n", " -0.027870699763298035,\n", " -0.02680159918963909,\n", " -0.10730879008769989,\n", " -0.09660787880420685,\n", " -0.010721202939748764,\n", " 0.03249472752213478,\n", " -0.010227357968688011,\n", " -0.005592911038547754,\n", " -0.02233457751572132,\n", " 0.003959502559155226,\n", " -0.0025461087934672832,\n", " -0.07056054472923279,\n", " -0.01288093812763691,\n", " 0.03734854981303215,\n", " -0.0930633544921875,\n", " 0.06263089179992676,\n", " -0.022451557219028473,\n", " 0.011584922671318054,\n", " 0.07056082785129547,\n", " 0.07839607447385788,\n", " -0.03750450536608696,\n", " 0.08674977719783783,\n", " -0.0174140315502882,\n", " 0.037801019847393036,\n", " -0.04431292042136192,\n", " -0.003121826099231839,\n", " -0.04473913460969925,\n", " -0.009062718600034714,\n", " 0.06917019933462143,\n", " -0.07210793346166611,\n", " 0.02439814619719982,\n", " 0.06415946036577225,\n", " -0.11128300428390503,\n", " 0.07395494729280472,\n", " -0.019613103941082954,\n", " -0.0576956532895565,\n", " 0.03607752546668053,\n", " -0.049007922410964966,\n", " -0.00931280292570591,\n", " 0.02782956324517727,\n", " -0.016698531806468964,\n", " 0.04213561490178108,\n", " 0.02651999704539776,\n", " -0.021170292049646378,\n", " -0.10422325879335403,\n", " 0.02582547254860401,\n", " 0.07547233253717422,\n", " -0.07150454074144363,\n", " 0.10658326745033264,\n", " -0.08328848332166672,\n", " -0.006845302879810333,\n", " -0.018662545830011368,\n", " -0.009805584326386452,\n", " 0.035663068294525146,\n", " 0.0027744239196181297,\n", " -0.03721313178539276,\n", " 0.06117653474211693,\n", " 0.03830438479781151,\n", " -0.01618945226073265,\n", " -0.02423257753252983,\n", " -0.0009939797455444932,\n", " -0.003057157387956977,\n", " -0.07808902114629745,\n", " 0.057173147797584534,\n", " 0.015869930386543274,\n", " 0.01918310485780239,\n", " 0.08144430071115494,\n", " -2.1998719290650115e-08,\n", " -0.025966359302401543,\n", " -0.024850135669112206,\n", " 0.02227822132408619,\n", " 0.0793970599770546,\n", " 0.044460248202085495,\n", " 0.03317498043179512,\n", " 0.03564529865980148,\n", " 0.013410663232207298,\n", " -0.05888325348496437,\n", " -0.0570887066423893,\n", " 0.02409365586936474,\n", " -0.0031824831385165453,\n", " 0.07432717829942703,\n", " 0.00491950660943985,\n", " 0.037177130579948425,\n", " 0.1214393675327301,\n", " -0.02980734035372734,\n", " 0.08316365629434586,\n", " -0.03441021963953972,\n", " -0.05670581012964249,\n", " -0.08702761679887772,\n", " -0.033726878464221954,\n", " 0.09084504842758179,\n", " 0.030235234647989273,\n", " 0.014355660416185856,\n", " 0.008767222985625267,\n", " -0.0827459916472435,\n", " 0.08210321515798569,\n", " -0.061066679656505585,\n", " 0.03521161153912544,\n", " -0.04115701839327812,\n", " 0.014578152447938919,\n", " -0.05554644390940666,\n", " 0.031068438664078712,\n", " -0.08362201601266861,\n", " -0.023382432758808136,\n", " -0.09858708828687668,\n", " 0.017514051869511604,\n", " 0.10520247370004654,\n", " -0.04585810378193855,\n", " -0.03088274411857128,\n", " -0.06560547649860382,\n", " -0.07936973869800568,\n", " 0.038559265434741974,\n", " -0.086161307990551,\n", " -0.07989706099033356,\n", " 0.06426848471164703,\n", " -0.04678329452872276,\n", " -0.005842810496687889,\n", " -9.329108434030786e-05,\n", " 0.005526330322027206,\n", " -0.060696180909872055,\n", " 0.045042477548122406,\n", " 0.020842568948864937,\n", " 0.10796718299388885,\n", " 0.016674820333719254,\n", " -0.03490869328379631,\n", " 0.050079092383384705,\n", " 0.046036623418331146,\n", " 0.1225607842206955,\n", " 0.03865363076329231,\n", " -0.06910006701946259,\n", " 0.03865937888622284,\n", " 4.1704730392666534e-05]]}" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "my_handler(request)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.12 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "7a2c4b191d1ae843dde5cb5f4d1f62fa892f6b79b0f9392a84691e890e33c5a4" } } }, "nbformat": 4, "nbformat_minor": 2 }