{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Pre-Funding', 'Seed', 'A', 'Mature', 'C', 'Public', 'D',\n", " 'Pre-Seed', 'B', 'Debt Financing', 'F', 'Crowdfunding', 'E'],\n", " dtype=object)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.stage.unique()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0., 3., 1., 4., 2., 5.])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.stage.unique()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namedescriptionyeartargetsizestageraisedtagscountrysourcetext_vector_
00.10 of a SecondSmart Indicators for Connected Vehicles2019.0B2B1-10Pre-FundingUndisclosed[connected-vehicles, adas, autonomous-vehicles...Israelhttps://finder.startupnationcentral.org/[-0.031224824488162994, -0.06342269480228424, ...
112trixMath Lessons for Young Kids2012.0B2B, B2C1-10Pre-FundingUndisclosed[sdg, schools, pre-k, serious-games, games, mo...Israelhttps://finder.startupnationcentral.org/[-0.038649097084999084, 0.028091922402381897, ...
21E TherapeuticsNovel RNA-targeting Drugs2021.0B2B51-200Seed$120M[pharmaceuticals, chronic-disease, immunology,...Israelhttps://finder.startupnationcentral.org/[0.04561534896492958, -0.017776092514395714, 0...
31MRoboticsRetail Automation Solutions with Nano Fulfillment2021.0B2B11-50A$25M[omni-channel, ecommerce, climate-tech, artifi...Israelhttps://finder.startupnationcentral.org/[0.0024080690927803516, -0.03042100928723812, ...
41touch.ioPersonal Data Flow Tracking and Data Cataloging2017.0B2B51-200A$16.1M[enterprise-solutions, data-protection, cyber-...Israelhttps://finder.startupnationcentral.org/[-0.01007091999053955, 0.10431888699531555, -0...
....................................
4981YOW HRHuman Resources Engagement and Optimization Pl...2020.0B2B, B2B2C1-10Pre-FundingUndisclosed[content-creators, e-learning, software-applic...Israelhttps://finder.startupnationcentral.org/[0.026961881667375565, 0.002459645736962557, -...
4982Yummi Home FoodMarketplace for Homemade Food2012.0B2C11-50Pre-FundingUndisclosed[ecommerce, p2p, delivery, online-shopping, ma...Israelhttps://finder.startupnationcentral.org/[0.0036857957020401955, 0.03582162782549858, -...
4983Yung-EtgarCustom Mechanized Harvesting Systems1982.0B2B51-200MatureUndisclosed[crops, agtech, harvesting, machinery, sdg, cl...Israelhttps://finder.startupnationcentral.org/[0.027293115854263306, 0.010461761616170406, 0...
4984YuviTalDigital Health and Fitness Solutions for Organ...2017.0B2B, B2C, B2G11-50Pre-FundingUndisclosed[fitness, digital-wallet, discount, mobile-app...Israelhttps://finder.startupnationcentral.org/[0.02851911261677742, 0.05474231392145157, -0....
4985Z-squareMicroendoscope for Minimally Invasive Imaging ...2013.0B2B11-50Seed$10M[endoscopy, medical-devices, minimally-invasiv...Israelhttps://finder.startupnationcentral.org/[0.012587728910148144, -0.07959864288568497, -...
\n", "

4986 rows × 11 columns

\n", "
" ], "text/plain": [ " name description \\\n", "0 0.10 of a Second Smart Indicators for Connected Vehicles \n", "1 12trix Math Lessons for Young Kids \n", "2 1E Therapeutics Novel RNA-targeting Drugs \n", "3 1MRobotics Retail Automation Solutions with Nano Fulfillment \n", "4 1touch.io Personal Data Flow Tracking and Data Cataloging \n", "... ... ... \n", "4981 YOW HR Human Resources Engagement and Optimization Pl... \n", "4982 Yummi Home Food Marketplace for Homemade Food \n", "4983 Yung-Etgar Custom Mechanized Harvesting Systems \n", "4984 YuviTal Digital Health and Fitness Solutions for Organ... \n", "4985 Z-square Microendoscope for Minimally Invasive Imaging ... \n", "\n", " year target size stage raised \\\n", "0 2019.0 B2B 1-10 Pre-Funding Undisclosed \n", "1 2012.0 B2B, B2C 1-10 Pre-Funding Undisclosed \n", "2 2021.0 B2B 51-200 Seed $120M \n", "3 2021.0 B2B 11-50 A $25M \n", "4 2017.0 B2B 51-200 A $16.1M \n", "... ... ... ... ... ... \n", "4981 2020.0 B2B, B2B2C 1-10 Pre-Funding Undisclosed \n", "4982 2012.0 B2C 11-50 Pre-Funding Undisclosed \n", "4983 1982.0 B2B 51-200 Mature Undisclosed \n", "4984 2017.0 B2B, B2C, B2G 11-50 Pre-Funding Undisclosed \n", "4985 2013.0 B2B 11-50 Seed $10M \n", "\n", " tags country \\\n", "0 [connected-vehicles, adas, autonomous-vehicles... Israel \n", "1 [sdg, schools, pre-k, serious-games, games, mo... Israel \n", "2 [pharmaceuticals, chronic-disease, immunology,... Israel \n", "3 [omni-channel, ecommerce, climate-tech, artifi... Israel \n", "4 [enterprise-solutions, data-protection, cyber-... Israel \n", "... ... ... \n", "4981 [content-creators, e-learning, software-applic... Israel \n", "4982 [ecommerce, p2p, delivery, online-shopping, ma... Israel \n", "4983 [crops, agtech, harvesting, machinery, sdg, cl... Israel \n", "4984 [fitness, digital-wallet, discount, mobile-app... Israel \n", "4985 [endoscopy, medical-devices, minimally-invasiv... Israel \n", "\n", " source \\\n", "0 https://finder.startupnationcentral.org/ \n", "1 https://finder.startupnationcentral.org/ \n", "2 https://finder.startupnationcentral.org/ \n", "3 https://finder.startupnationcentral.org/ \n", "4 https://finder.startupnationcentral.org/ \n", "... ... \n", "4981 https://finder.startupnationcentral.org/ \n", "4982 https://finder.startupnationcentral.org/ \n", "4983 https://finder.startupnationcentral.org/ \n", "4984 https://finder.startupnationcentral.org/ \n", "4985 https://finder.startupnationcentral.org/ \n", "\n", " text_vector_ \n", "0 [-0.031224824488162994, -0.06342269480228424, ... \n", "1 [-0.038649097084999084, 0.028091922402381897, ... \n", "2 [0.04561534896492958, -0.017776092514395714, 0... \n", "3 [0.0024080690927803516, -0.03042100928723812, ... \n", "4 [-0.01007091999053955, 0.10431888699531555, -0... \n", "... ... \n", "4981 [0.026961881667375565, 0.002459645736962557, -... \n", "4982 [0.0036857957020401955, 0.03582162782549858, -... \n", "4983 [0.027293115854263306, 0.010461761616170406, 0... \n", "4984 [0.02851911261677742, 0.05474231392145157, -0.... \n", "4985 [0.012587728910148144, -0.07959864288568497, -... \n", "\n", "[4986 rows x 11 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df1 = pd.read_parquet('df_encoded.parquet')\n", "df1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titledescriptionstageindustry_nameurlcountry_slugcity_sluglocationregion_nametext_vector_
0DigipalDigipal is a digital consultancy based in Tbil...0.0Software & Datahttps://www.digipal.agency/georgiatbilisiTbilisi, GeorgiaEurope[0.017287444323301315, 0.06208805367350578, -0...
1BeatBindBeatBind is the industry's long overdue platfo...0.0Social & Leisurehttps://beatbind.io/georgiatbilisiTbilisi, GeorgiaEurope[-0.00438214186578989, -0.051213208585977554, ...
2Smart AcademySmart Academy is a modern educational institut...0.0Edtechhttps://smartacademy.ge/georgiatbilisiTbilisi, GeorgiaEurope[0.0005468669114634395, -0.05331585183739662, ...
3MaxinAIMaxinAI isglobal AI development company that w...0.0Software & Datahttps://www.maxinai.com/#all-industriesgeorgiatbilisiTbilisi, GeorgiaEurope[0.021948501467704773, 0.024166792631149292, -...
4TLANCERTlancer aims to create an unlimited educationa...0.0Edtechhttps://www.tlancer.ge/georgiatbilisiTbilisi, GeorgiaEurope[0.02025573141872883, -0.022812215611338615, -...
.................................
94521OneTwoklkdčksč kdč skdčlsk čdksčd ksčk dčskdčk čdk0.0Software & Datawww.nethrcroatiazagrebZagreb, CroatiaEurope[0.07235302031040192, -0.05674564838409424, -0...
94522TrialfireEngaged trialers turn into customers, engaged ...0.0Software & Datahttp://www.trialfire.comcanadatorontoToronto, CanadaNorth America[0.030764097347855568, 0.054082825779914856, -...
94523ILLUMAGEARILLUMAGEAR’s mission is to illuminate people a...0.0Software & Datahttp://www.illumagear.comunited-statesseattleSeattle, United StatesNorth America[0.015447210520505905, -0.0984775498509407, 0....
94524KnowillageKnowillage lets you add personalization to you...0.0Edtechhttp://www.knowillage.comcanadavancouverVancouver, CanadaNorth America[0.007970919832587242, -0.04347420111298561, -...
94525Iris HolidaysIris Holidays is a full service Kerala tours o...0.0Software & Datahttp://www.irisholidays.comindiakochiKochi, IndiaAsia Pacific[0.0032976483926177025, -0.010843133553862572,...
\n", "

94526 rows × 10 columns

\n", "
" ], "text/plain": [ " title description \\\n", "0 Digipal Digipal is a digital consultancy based in Tbil... \n", "1 BeatBind BeatBind is the industry's long overdue platfo... \n", "2 Smart Academy Smart Academy is a modern educational institut... \n", "3 MaxinAI MaxinAI isglobal AI development company that w... \n", "4 TLANCER Tlancer aims to create an unlimited educationa... \n", "... ... ... \n", "94521 OneTwo klkdčksč kdč skdčlsk čdksčd ksčk dčskdčk čdk \n", "94522 Trialfire Engaged trialers turn into customers, engaged ... \n", "94523 ILLUMAGEAR ILLUMAGEAR’s mission is to illuminate people a... \n", "94524 Knowillage Knowillage lets you add personalization to you... \n", "94525 Iris Holidays Iris Holidays is a full service Kerala tours o... \n", "\n", " stage industry_name url \\\n", "0 0.0 Software & Data https://www.digipal.agency/ \n", "1 0.0 Social & Leisure https://beatbind.io/ \n", "2 0.0 Edtech https://smartacademy.ge/ \n", "3 0.0 Software & Data https://www.maxinai.com/#all-industries \n", "4 0.0 Edtech https://www.tlancer.ge/ \n", "... ... ... ... \n", "94521 0.0 Software & Data www.nethr \n", "94522 0.0 Software & Data http://www.trialfire.com \n", "94523 0.0 Software & Data http://www.illumagear.com \n", "94524 0.0 Edtech http://www.knowillage.com \n", "94525 0.0 Software & Data http://www.irisholidays.com \n", "\n", " country_slug city_slug location region_name \\\n", "0 georgia tbilisi Tbilisi, Georgia Europe \n", "1 georgia tbilisi Tbilisi, Georgia Europe \n", "2 georgia tbilisi Tbilisi, Georgia Europe \n", "3 georgia tbilisi Tbilisi, Georgia Europe \n", "4 georgia tbilisi Tbilisi, Georgia Europe \n", "... ... ... ... ... \n", "94521 croatia zagreb Zagreb, Croatia Europe \n", "94522 canada toronto Toronto, Canada North America \n", "94523 united-states seattle Seattle, United States North America \n", "94524 canada vancouver Vancouver, Canada North America \n", "94525 india kochi Kochi, India Asia Pacific \n", "\n", " text_vector_ \n", "0 [0.017287444323301315, 0.06208805367350578, -0... \n", "1 [-0.00438214186578989, -0.051213208585977554, ... \n", "2 [0.0005468669114634395, -0.05331585183739662, ... \n", "3 [0.021948501467704773, 0.024166792631149292, -... \n", "4 [0.02025573141872883, -0.022812215611338615, -... \n", "... ... \n", "94521 [0.07235302031040192, -0.05674564838409424, -0... \n", "94522 [0.030764097347855568, 0.054082825779914856, -... \n", "94523 [0.015447210520505905, -0.0984775498509407, 0.... \n", "94524 [0.007970919832587242, -0.04347420111298561, -... \n", "94525 [0.0032976483926177025, -0.010843133553862572,... \n", "\n", "[94526 rows x 10 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_dict = {\n", " 0 : \"pre-seed\",\n", " 1 : \"seed\",\n", " 2 : \"A\",\n", " 3 : \"B\",\n", " 4 : \"C\",\n", " 5 : \"Exit\",\n", "}\n", "\n", "df2 = pd.read_parquet('df_encoded2.parquet')\n", "df2.columns = [['name', 'description', 'stage', 'industry_name', 'url', 'country_slug', 'text_vector_']]\n", "df2['stage'] = df2['stage'].apply(lambda x : stage_dict[x])\n", "df2['raised'] = 'Undisclosed'\n", "df2['size'] = '11-500+'\n", "df2['source'] = 'https://www.startupblink.com'\n", "df2.columns = [['name', 'description', 'stage', 'tags', 'url', 'country_slug', 'text_vector_', 'raised', 'size', 'source']]" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.system('pip install openpyxl')\n", "os.system('pip install sentence-transformers')\n", "import pandas as pd\n", "import gradio as gr\n", "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n", "\n", "df = pd.read_parquet('df_encoded.parquet')\n", "df['tags'] = df['tags'].apply(lambda x : str(x))\n", "def parse_raised(x):\n", " if x == 'Undisclosed':\n", " return 0\n", " else: \n", " quantifier = x[-1]\n", " x = float(x[1:-1])\n", " if quantifier == 'K':\n", " return x/1000\n", " elif quantifier == 'M':\n", " return x\n", "df['raised'] = df['raised'].apply(lambda x : parse_raised(x))\n", "df = df.reset_index(drop=True)\n", "\n", "from sklearn.neighbors import NearestNeighbors\n", "import numpy as np\n", "import pandas as pd\n", "from sentence_transformers import SentenceTransformer" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n", " warnings.warn(\n", "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 1}\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7896\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "a\n" ] } ], "source": [ "def filter_df(df, column_name, filter_type, filter_value):\n", " if filter_type == '==':\n", " df_filtered = df[df[column_name]==filter_value]\n", " elif filter_type == '>=':\n", " df_filtered = df[df[column_name]>=filter_value]\n", " elif filter_type == '<=':\n", " df_filtered = df[df[column_name]<=filter_value]\n", " elif filter_type == 'contains':\n", " df_filtered = df[df['target'].str.contains(filter_value)]\n", " return df_filtered\n", "\n", "def search(df, query):\n", " product = model.encode(query).tolist()\n", " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n", "\n", " #prepare model\n", " nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n", "\n", " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n", "\n", " #print out the description of every recommended product\n", " return df.iloc[list(indices)[0]][['name', 'description', 'raised', 'year', 'target', 'size', 'stage', 'tags']]\n", "\n", "#the first module becomes text1, the second module file1\n", "def greet(size, target, raised, query): \n", " df_size = filter_df(df, 'size', '==', size)\n", " df_target = filter_df(df_size, 'target', 'contains', target)\n", " def raised_zero(x):\n", " if x == 0:\n", " return 'Undisclosed'\n", " else:\n", " return x\n", " print('a')\n", " df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]\n", " df_knn = search(df_raised, query)\n", " #we live the sorting for last\n", " df_knn = df_knn.sort_values('raised', ascending=False)\n", " df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))\n", "\n", " return df_knn\n", "\n", "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n", " gr.Markdown(\n", " \"\"\"\n", " # Gradio with History\n", " \"\"\"\n", " )\n", " size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+'], multiselect=False, value='11-50', label='size')\n", " target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], value='B2B', multiselect=False, label='target')\n", " raised = gr.Slider(0, 20, value=5, step_size=1, label=\"Minimum raising (in Millions)\")\n", " query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')\n", " btn = gr.Button(value=\"Search for a Startup\")\n", " output1 = gr.DataFrame(label='value')\n", " # btn.click(greet, inputs='text', outputs=['dataframe'])\n", " btn.click(greet, [size, target, raised, query], [output1])\n", "demo.launch(share=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }