{ "cells": [ { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['Pre-Funding', 'Seed', 'A', 'Mature', 'C', 'Public', 'D',\n", " 'Pre-Seed', 'B', 'Debt Financing', 'F', 'Crowdfunding', 'E'],\n", " dtype=object)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1.stage.unique()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0., 3., 1., 4., 2., 5.])" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2.stage.unique()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namedescriptionyeartargetsizestageraisedtagscountrysourcetext_vector_
00.10 of a SecondSmart Indicators for Connected Vehicles2019.0B2B1-10Pre-FundingUndisclosed[connected-vehicles, adas, autonomous-vehicles...Israelhttps://finder.startupnationcentral.org/[-0.031224824488162994, -0.06342269480228424, ...
112trixMath Lessons for Young Kids2012.0B2B, B2C1-10Pre-FundingUndisclosed[sdg, schools, pre-k, serious-games, games, mo...Israelhttps://finder.startupnationcentral.org/[-0.038649097084999084, 0.028091922402381897, ...
21E TherapeuticsNovel RNA-targeting Drugs2021.0B2B51-200Seed$120M[pharmaceuticals, chronic-disease, immunology,...Israelhttps://finder.startupnationcentral.org/[0.04561534896492958, -0.017776092514395714, 0...
31MRoboticsRetail Automation Solutions with Nano Fulfillment2021.0B2B11-50A$25M[omni-channel, ecommerce, climate-tech, artifi...Israelhttps://finder.startupnationcentral.org/[0.0024080690927803516, -0.03042100928723812, ...
41touch.ioPersonal Data Flow Tracking and Data Cataloging2017.0B2B51-200A$16.1M[enterprise-solutions, data-protection, cyber-...Israelhttps://finder.startupnationcentral.org/[-0.01007091999053955, 0.10431888699531555, -0...
....................................
4981YOW HRHuman Resources Engagement and Optimization Pl...2020.0B2B, B2B2C1-10Pre-FundingUndisclosed[content-creators, e-learning, software-applic...Israelhttps://finder.startupnationcentral.org/[0.026961881667375565, 0.002459645736962557, -...
4982Yummi Home FoodMarketplace for Homemade Food2012.0B2C11-50Pre-FundingUndisclosed[ecommerce, p2p, delivery, online-shopping, ma...Israelhttps://finder.startupnationcentral.org/[0.0036857957020401955, 0.03582162782549858, -...
4983Yung-EtgarCustom Mechanized Harvesting Systems1982.0B2B51-200MatureUndisclosed[crops, agtech, harvesting, machinery, sdg, cl...Israelhttps://finder.startupnationcentral.org/[0.027293115854263306, 0.010461761616170406, 0...
4984YuviTalDigital Health and Fitness Solutions for Organ...2017.0B2B, B2C, B2G11-50Pre-FundingUndisclosed[fitness, digital-wallet, discount, mobile-app...Israelhttps://finder.startupnationcentral.org/[0.02851911261677742, 0.05474231392145157, -0....
4985Z-squareMicroendoscope for Minimally Invasive Imaging ...2013.0B2B11-50Seed$10M[endoscopy, medical-devices, minimally-invasiv...Israelhttps://finder.startupnationcentral.org/[0.012587728910148144, -0.07959864288568497, -...
\n", "

4986 rows × 11 columns

\n", "
" ], "text/plain": [ " name description \\\n", "0 0.10 of a Second Smart Indicators for Connected Vehicles \n", "1 12trix Math Lessons for Young Kids \n", "2 1E Therapeutics Novel RNA-targeting Drugs \n", "3 1MRobotics Retail Automation Solutions with Nano Fulfillment \n", "4 1touch.io Personal Data Flow Tracking and Data Cataloging \n", "... ... ... \n", "4981 YOW HR Human Resources Engagement and Optimization Pl... \n", "4982 Yummi Home Food Marketplace for Homemade Food \n", "4983 Yung-Etgar Custom Mechanized Harvesting Systems \n", "4984 YuviTal Digital Health and Fitness Solutions for Organ... \n", "4985 Z-square Microendoscope for Minimally Invasive Imaging ... \n", "\n", " year target size stage raised \\\n", "0 2019.0 B2B 1-10 Pre-Funding Undisclosed \n", "1 2012.0 B2B, B2C 1-10 Pre-Funding Undisclosed \n", "2 2021.0 B2B 51-200 Seed $120M \n", "3 2021.0 B2B 11-50 A $25M \n", "4 2017.0 B2B 51-200 A $16.1M \n", "... ... ... ... ... ... \n", "4981 2020.0 B2B, B2B2C 1-10 Pre-Funding Undisclosed \n", "4982 2012.0 B2C 11-50 Pre-Funding Undisclosed \n", "4983 1982.0 B2B 51-200 Mature Undisclosed \n", "4984 2017.0 B2B, B2C, B2G 11-50 Pre-Funding Undisclosed \n", "4985 2013.0 B2B 11-50 Seed $10M \n", "\n", " tags country \\\n", "0 [connected-vehicles, adas, autonomous-vehicles... Israel \n", "1 [sdg, schools, pre-k, serious-games, games, mo... Israel \n", "2 [pharmaceuticals, chronic-disease, immunology,... Israel \n", "3 [omni-channel, ecommerce, climate-tech, artifi... Israel \n", "4 [enterprise-solutions, data-protection, cyber-... Israel \n", "... ... ... \n", "4981 [content-creators, e-learning, software-applic... Israel \n", "4982 [ecommerce, p2p, delivery, online-shopping, ma... Israel \n", "4983 [crops, agtech, harvesting, machinery, sdg, cl... Israel \n", "4984 [fitness, digital-wallet, discount, mobile-app... Israel \n", "4985 [endoscopy, medical-devices, minimally-invasiv... Israel \n", "\n", " source \\\n", "0 https://finder.startupnationcentral.org/ \n", "1 https://finder.startupnationcentral.org/ \n", "2 https://finder.startupnationcentral.org/ \n", "3 https://finder.startupnationcentral.org/ \n", "4 https://finder.startupnationcentral.org/ \n", "... ... \n", "4981 https://finder.startupnationcentral.org/ \n", "4982 https://finder.startupnationcentral.org/ \n", "4983 https://finder.startupnationcentral.org/ \n", "4984 https://finder.startupnationcentral.org/ \n", "4985 https://finder.startupnationcentral.org/ \n", "\n", " text_vector_ \n", "0 [-0.031224824488162994, -0.06342269480228424, ... \n", "1 [-0.038649097084999084, 0.028091922402381897, ... \n", "2 [0.04561534896492958, -0.017776092514395714, 0... \n", "3 [0.0024080690927803516, -0.03042100928723812, ... \n", "4 [-0.01007091999053955, 0.10431888699531555, -0... \n", "... ... \n", "4981 [0.026961881667375565, 0.002459645736962557, -... \n", "4982 [0.0036857957020401955, 0.03582162782549858, -... \n", "4983 [0.027293115854263306, 0.010461761616170406, 0... \n", "4984 [0.02851911261677742, 0.05474231392145157, -0.... \n", "4985 [0.012587728910148144, -0.07959864288568497, -... \n", "\n", "[4986 rows x 11 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df1 = pd.read_parquet('df_encoded.parquet')\n", "df1" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namedescriptionstagetagsurlcountrytext_vector_raisedsizesourcetarget
0DigipalDigipal is a digital consultancy based in Tbil...pre-seed[software, data]https://www.digipal.agency/georgia[0.017287444323301315, 0.06208805367350578, -0...Undisclosed11-500+https://www.startupblink.comUndisclosed
1BeatBindBeatBind is the industry's long overdue platfo...pre-seed[social, leisure]https://beatbind.io/georgia[-0.00438214186578989, -0.051213208585977554, ...Undisclosed11-500+https://www.startupblink.comUndisclosed
2Smart AcademySmart Academy is a modern educational institut...pre-seed[edtech]https://smartacademy.ge/georgia[0.0005468669114634395, -0.05331585183739662, ...Undisclosed11-500+https://www.startupblink.comUndisclosed
3MaxinAIMaxinAI isglobal AI development company that w...pre-seed[software, data]https://www.maxinai.com/#all-industriesgeorgia[0.021948501467704773, 0.024166792631149292, -...Undisclosed11-500+https://www.startupblink.comUndisclosed
4TLANCERTlancer aims to create an unlimited educationa...pre-seed[edtech]https://www.tlancer.ge/georgia[0.02025573141872883, -0.022812215611338615, -...Undisclosed11-500+https://www.startupblink.comUndisclosed
....................................
94521OneTwoklkdčksč kdč skdčlsk čdksčd ksčk dčskdčk čdkpre-seed[software, data]www.nethrcroatia[0.07235302031040192, -0.05674564838409424, -0...Undisclosed11-500+https://www.startupblink.comUndisclosed
94522TrialfireEngaged trialers turn into customers, engaged ...pre-seed[software, data]http://www.trialfire.comcanada[0.030764097347855568, 0.054082825779914856, -...Undisclosed11-500+https://www.startupblink.comUndisclosed
94523ILLUMAGEARILLUMAGEAR’s mission is to illuminate people a...pre-seed[software, data]http://www.illumagear.comunited-states[0.015447210520505905, -0.0984775498509407, 0....Undisclosed11-500+https://www.startupblink.comUndisclosed
94524KnowillageKnowillage lets you add personalization to you...pre-seed[edtech]http://www.knowillage.comcanada[0.007970919832587242, -0.04347420111298561, -...Undisclosed11-500+https://www.startupblink.comUndisclosed
94525Iris HolidaysIris Holidays is a full service Kerala tours o...pre-seed[software, data]http://www.irisholidays.comindia[0.0032976483926177025, -0.010843133553862572,...Undisclosed11-500+https://www.startupblink.comUndisclosed
\n", "

94526 rows × 11 columns

\n", "
" ], "text/plain": [ " name description \\\n", "0 Digipal Digipal is a digital consultancy based in Tbil... \n", "1 BeatBind BeatBind is the industry's long overdue platfo... \n", "2 Smart Academy Smart Academy is a modern educational institut... \n", "3 MaxinAI MaxinAI isglobal AI development company that w... \n", "4 TLANCER Tlancer aims to create an unlimited educationa... \n", "... ... ... \n", "94521 OneTwo klkdčksč kdč skdčlsk čdksčd ksčk dčskdčk čdk \n", "94522 Trialfire Engaged trialers turn into customers, engaged ... \n", "94523 ILLUMAGEAR ILLUMAGEAR’s mission is to illuminate people a... \n", "94524 Knowillage Knowillage lets you add personalization to you... \n", "94525 Iris Holidays Iris Holidays is a full service Kerala tours o... \n", "\n", " stage tags url \\\n", "0 pre-seed [software, data] https://www.digipal.agency/ \n", "1 pre-seed [social, leisure] https://beatbind.io/ \n", "2 pre-seed [edtech] https://smartacademy.ge/ \n", "3 pre-seed [software, data] https://www.maxinai.com/#all-industries \n", "4 pre-seed [edtech] https://www.tlancer.ge/ \n", "... ... ... ... \n", "94521 pre-seed [software, data] www.nethr \n", "94522 pre-seed [software, data] http://www.trialfire.com \n", "94523 pre-seed [software, data] http://www.illumagear.com \n", "94524 pre-seed [edtech] http://www.knowillage.com \n", "94525 pre-seed [software, data] http://www.irisholidays.com \n", "\n", " country text_vector_ \\\n", "0 georgia [0.017287444323301315, 0.06208805367350578, -0... \n", "1 georgia [-0.00438214186578989, -0.051213208585977554, ... \n", "2 georgia [0.0005468669114634395, -0.05331585183739662, ... \n", "3 georgia [0.021948501467704773, 0.024166792631149292, -... \n", "4 georgia [0.02025573141872883, -0.022812215611338615, -... \n", "... ... ... \n", "94521 croatia [0.07235302031040192, -0.05674564838409424, -0... \n", "94522 canada [0.030764097347855568, 0.054082825779914856, -... \n", "94523 united-states [0.015447210520505905, -0.0984775498509407, 0.... \n", "94524 canada [0.007970919832587242, -0.04347420111298561, -... \n", "94525 india [0.0032976483926177025, -0.010843133553862572,... \n", "\n", " raised size source target \n", "0 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "1 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "2 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "3 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "4 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "... ... ... ... ... \n", "94521 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "94522 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "94523 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "94524 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "94525 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n", "\n", "[94526 rows x 11 columns]" ] }, "execution_count": 91, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stage_dict = {\n", " 0 : \"pre-seed\",\n", " 1 : \"seed\",\n", " 2 : \"A\",\n", " 3 : \"B\",\n", " 4 : \"C\",\n", " 5 : \"Exit\",\n", "}\n", "\n", "df2 = pd.read_parquet('df_encoded2.parquet')\n", "df2 = df2[['title', 'description', 'stage', 'industry_name', 'url', 'country_slug', 'text_vector_']]\n", "df2['stage'] = df2['stage'].apply(lambda x : stage_dict[x])\n", "df2['raised'] = 'Undisclosed'\n", "df2['size'] = '11-500+'\n", "df2['source'] = 'https://www.startupblink.com'\n", "df2['target'] = 'Undisclosed'\n", "df2['industry_name']= df2['industry_name'].apply(lambda x : [a.strip().lower() for a in x.split('&')])\n", "df2.columns = ['name', 'description', 'stage', 'tags', 'url', 'country', 'text_vector_', 'raised', 'size', 'source', 'target']\n", "df2" ] }, { "cell_type": "code", "execution_count": 94, "metadata": {}, "outputs": [], "source": [ "df3 = pd.concat([df1, df2], ignore_index=True, axis=0).fillna('').drop(['year'], axis=1)\n", "df3\n", "df3.to_parquet('df_encoded3.parquet')" ] }, { "cell_type": "code", "execution_count": 88, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namedescriptiontargetsizestageraisedtagscountrysourcetext_vector_url
00.10 of a SecondSmart Indicators for Connected VehiclesB2B1-10Pre-FundingUndisclosed[connected-vehicles, adas, autonomous-vehicles...Israelhttps://finder.startupnationcentral.org/[-0.031224824488162994, -0.06342269480228424, ...
112trixMath Lessons for Young KidsB2B, B2C1-10Pre-FundingUndisclosed[sdg, schools, pre-k, serious-games, games, mo...Israelhttps://finder.startupnationcentral.org/[-0.038649097084999084, 0.028091922402381897, ...
21E TherapeuticsNovel RNA-targeting DrugsB2B51-200Seed$120M[pharmaceuticals, chronic-disease, immunology,...Israelhttps://finder.startupnationcentral.org/[0.04561534896492958, -0.017776092514395714, 0...
31MRoboticsRetail Automation Solutions with Nano FulfillmentB2B11-50A$25M[omni-channel, ecommerce, climate-tech, artifi...Israelhttps://finder.startupnationcentral.org/[0.0024080690927803516, -0.03042100928723812, ...
41touch.ioPersonal Data Flow Tracking and Data CatalogingB2B51-200A$16.1M[enterprise-solutions, data-protection, cyber-...Israelhttps://finder.startupnationcentral.org/[-0.01007091999053955, 0.10431888699531555, -0...
524meNext-generation Personal AssistantB2C1-10SeedUndisclosed[time-management, scheduling, calendars, artif...Israelhttps://finder.startupnationcentral.org/[0.035849399864673615, 0.04990792274475098, -0...
6270SurgicalSpecialty Laparoscopic System for Wide Cavity ...B2B, B2C11-50Pre-FundingUndisclosed[endoscopy, surgery, operating-rooms, optics, ...Israelhttps://finder.startupnationcentral.org/[-0.00110541470348835, 0.011574415490031242, 0...
72bPrecisePrecision Medicine SolutionB2B51-200MatureUndisclosed[decision-making, predictive-analytics, cardio...Israelhttps://finder.startupnationcentral.org/[0.01863308809697628, 0.03877090662717819, -0....
82breathe TechnologiesSmart Device and Mobile App to Induce SleepB2C1-10Pre-FundingUndisclosed[monitoring, digital-healthcare, sleep-disorde...Israelhttps://finder.startupnationcentral.org/[-0.03323083370923996, -0.006272533442825079, ...
92TeaMSoftware Solutions for Financial CompaniesB2B, B2B2C11-50MatureUndisclosed[marketing, insurance-companies, bank-infrastr...Israelhttps://finder.startupnationcentral.org/[-0.0050485446117818356, 0.030337687581777573,...
10DigipalDigipal is a digital consultancy based in Tbil...georgiaUndisclosedpre-seed11-500+[software, data][0.017287444323301315, 0.06208805367350578, -0...https://www.startupblink.comUndisclosedhttps://www.digipal.agency/
11BeatBindBeatBind is the industry's long overdue platfo...georgiaUndisclosedpre-seed11-500+[social, leisure][-0.00438214186578989, -0.051213208585977554, ...https://www.startupblink.comUndisclosedhttps://beatbind.io/
12Smart AcademySmart Academy is a modern educational institut...georgiaUndisclosedpre-seed11-500+[edtech][0.0005468669114634395, -0.05331585183739662, ...https://www.startupblink.comUndisclosedhttps://smartacademy.ge/
13MaxinAIMaxinAI isglobal AI development company that w...georgiaUndisclosedpre-seed11-500+[software, data][0.021948501467704773, 0.024166792631149292, -...https://www.startupblink.comUndisclosedhttps://www.maxinai.com/#all-industries
14TLANCERTlancer aims to create an unlimited educationa...georgiaUndisclosedpre-seed11-500+[edtech][0.02025573141872883, -0.022812215611338615, -...https://www.startupblink.comUndisclosedhttps://www.tlancer.ge/
15MyCoins.geMyCoins.ge is the biggest Crypto exchange plat...georgiaUndisclosedpre-seed11-500+[fintech][0.0306679829955101, -0.010290002450346947, -0...https://www.startupblink.comUndisclosedhttps://www.mycoins.ge/index.php/main/welcome
16ATL TechATL Tech is a company that specialized in Info...azerbaijanUndisclosedpre-seed11-500+[software, data][0.014148630201816559, -0.01890609972178936, -...https://www.startupblink.comUndisclosedhttps://www.atltech.az/az
17zypl.aizypl.ai’s strategy is to become the leading AI...tajikistanUndisclosedpre-seed11-500+[software, data][0.001473484211601317, 0.008834785781800747, -...https://www.startupblink.comUndisclosedhttps://zypl.ai/
18botifibotifi is a tool for a quick start of sales on...uzbekistanUndisclosedpre-seed11-500+[software, data][0.017161941155791283, -0.015285761095583439, ...https://www.startupblink.comUndisclosedhttps://botifi.me/en/
19smartupsmartup develop software solutions for various...uzbekistanUndisclosedpre-seed11-500+[software, data][0.00023191649233922362, -0.005923444870859384...https://www.startupblink.comUndisclosedhttps://smartup.uz/en.html
\n", "
" ], "text/plain": [ " name description \\\n", "0 0.10 of a Second Smart Indicators for Connected Vehicles \n", "1 12trix Math Lessons for Young Kids \n", "2 1E Therapeutics Novel RNA-targeting Drugs \n", "3 1MRobotics Retail Automation Solutions with Nano Fulfillment \n", "4 1touch.io Personal Data Flow Tracking and Data Cataloging \n", "5 24me Next-generation Personal Assistant \n", "6 270Surgical Specialty Laparoscopic System for Wide Cavity ... \n", "7 2bPrecise Precision Medicine Solution \n", "8 2breathe Technologies Smart Device and Mobile App to Induce Sleep \n", "9 2TeaM Software Solutions for Financial Companies \n", "10 Digipal Digipal is a digital consultancy based in Tbil... \n", "11 BeatBind BeatBind is the industry's long overdue platfo... \n", "12 Smart Academy Smart Academy is a modern educational institut... \n", "13 MaxinAI MaxinAI isglobal AI development company that w... \n", "14 TLANCER Tlancer aims to create an unlimited educationa... \n", "15 MyCoins.ge MyCoins.ge is the biggest Crypto exchange plat... \n", "16 ATL Tech ATL Tech is a company that specialized in Info... \n", "17 zypl.ai zypl.ai’s strategy is to become the leading AI... \n", "18 botifi botifi is a tool for a quick start of sales on... \n", "19 smartup smartup develop software solutions for various... \n", "\n", " target size stage raised \\\n", "0 B2B 1-10 Pre-Funding Undisclosed \n", "1 B2B, B2C 1-10 Pre-Funding Undisclosed \n", "2 B2B 51-200 Seed $120M \n", "3 B2B 11-50 A $25M \n", "4 B2B 51-200 A $16.1M \n", "5 B2C 1-10 Seed Undisclosed \n", "6 B2B, B2C 11-50 Pre-Funding Undisclosed \n", "7 B2B 51-200 Mature Undisclosed \n", "8 B2C 1-10 Pre-Funding Undisclosed \n", "9 B2B, B2B2C 11-50 Mature Undisclosed \n", "10 georgia Undisclosed pre-seed 11-500+ \n", "11 georgia Undisclosed pre-seed 11-500+ \n", "12 georgia Undisclosed pre-seed 11-500+ \n", "13 georgia Undisclosed pre-seed 11-500+ \n", "14 georgia Undisclosed pre-seed 11-500+ \n", "15 georgia Undisclosed pre-seed 11-500+ \n", "16 azerbaijan Undisclosed pre-seed 11-500+ \n", "17 tajikistan Undisclosed pre-seed 11-500+ \n", "18 uzbekistan Undisclosed pre-seed 11-500+ \n", "19 uzbekistan Undisclosed pre-seed 11-500+ \n", "\n", " tags \\\n", "0 [connected-vehicles, adas, autonomous-vehicles... \n", "1 [sdg, schools, pre-k, serious-games, games, mo... \n", "2 [pharmaceuticals, chronic-disease, immunology,... \n", "3 [omni-channel, ecommerce, climate-tech, artifi... \n", "4 [enterprise-solutions, data-protection, cyber-... \n", "5 [time-management, scheduling, calendars, artif... \n", "6 [endoscopy, surgery, operating-rooms, optics, ... \n", "7 [decision-making, predictive-analytics, cardio... \n", "8 [monitoring, digital-healthcare, sleep-disorde... \n", "9 [marketing, insurance-companies, bank-infrastr... \n", "10 [software, data] \n", "11 [social, leisure] \n", "12 [edtech] \n", "13 [software, data] \n", "14 [edtech] \n", "15 [fintech] \n", "16 [software, data] \n", "17 [software, data] \n", "18 [software, data] \n", "19 [software, data] \n", "\n", " country \\\n", "0 Israel \n", "1 Israel \n", "2 Israel \n", "3 Israel \n", "4 Israel \n", "5 Israel \n", "6 Israel \n", "7 Israel \n", "8 Israel \n", "9 Israel \n", "10 [0.017287444323301315, 0.06208805367350578, -0... \n", "11 [-0.00438214186578989, -0.051213208585977554, ... \n", "12 [0.0005468669114634395, -0.05331585183739662, ... \n", "13 [0.021948501467704773, 0.024166792631149292, -... \n", "14 [0.02025573141872883, -0.022812215611338615, -... \n", "15 [0.0306679829955101, -0.010290002450346947, -0... \n", "16 [0.014148630201816559, -0.01890609972178936, -... \n", "17 [0.001473484211601317, 0.008834785781800747, -... \n", "18 [0.017161941155791283, -0.015285761095583439, ... \n", "19 [0.00023191649233922362, -0.005923444870859384... \n", "\n", " source \\\n", "0 https://finder.startupnationcentral.org/ \n", "1 https://finder.startupnationcentral.org/ \n", "2 https://finder.startupnationcentral.org/ \n", "3 https://finder.startupnationcentral.org/ \n", "4 https://finder.startupnationcentral.org/ \n", "5 https://finder.startupnationcentral.org/ \n", "6 https://finder.startupnationcentral.org/ \n", "7 https://finder.startupnationcentral.org/ \n", "8 https://finder.startupnationcentral.org/ \n", "9 https://finder.startupnationcentral.org/ \n", "10 https://www.startupblink.com \n", "11 https://www.startupblink.com \n", "12 https://www.startupblink.com \n", "13 https://www.startupblink.com \n", "14 https://www.startupblink.com \n", "15 https://www.startupblink.com \n", "16 https://www.startupblink.com \n", "17 https://www.startupblink.com \n", "18 https://www.startupblink.com \n", "19 https://www.startupblink.com \n", "\n", " text_vector_ \\\n", "0 [-0.031224824488162994, -0.06342269480228424, ... \n", "1 [-0.038649097084999084, 0.028091922402381897, ... \n", "2 [0.04561534896492958, -0.017776092514395714, 0... \n", "3 [0.0024080690927803516, -0.03042100928723812, ... \n", "4 [-0.01007091999053955, 0.10431888699531555, -0... \n", "5 [0.035849399864673615, 0.04990792274475098, -0... \n", "6 [-0.00110541470348835, 0.011574415490031242, 0... \n", "7 [0.01863308809697628, 0.03877090662717819, -0.... \n", "8 [-0.03323083370923996, -0.006272533442825079, ... \n", "9 [-0.0050485446117818356, 0.030337687581777573,... \n", "10 Undisclosed \n", "11 Undisclosed \n", "12 Undisclosed \n", "13 Undisclosed \n", "14 Undisclosed \n", "15 Undisclosed \n", "16 Undisclosed \n", "17 Undisclosed \n", "18 Undisclosed \n", "19 Undisclosed \n", "\n", " url \n", "0 \n", "1 \n", "2 \n", "3 \n", "4 \n", "5 \n", "6 \n", "7 \n", "8 \n", "9 \n", "10 https://www.digipal.agency/ \n", "11 https://beatbind.io/ \n", "12 https://smartacademy.ge/ \n", "13 https://www.maxinai.com/#all-industries \n", "14 https://www.tlancer.ge/ \n", "15 https://www.mycoins.ge/index.php/main/welcome \n", "16 https://www.atltech.az/az \n", "17 https://zypl.ai/ \n", "18 https://botifi.me/en/ \n", "19 https://smartup.uz/en.html " ] }, "execution_count": 88, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.merge(df1, df2, on='A', how='outer')\n", "df" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.system('pip install openpyxl')\n", "os.system('pip install sentence-transformers')\n", "import pandas as pd\n", "import gradio as gr\n", "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n", "\n", "df = pd.read_parquet('df_encoded.parquet')\n", "df['tags'] = df['tags'].apply(lambda x : str(x))\n", "def parse_raised(x):\n", " if x == 'Undisclosed':\n", " return 0\n", " else: \n", " quantifier = x[-1]\n", " x = float(x[1:-1])\n", " if quantifier == 'K':\n", " return x/1000\n", " elif quantifier == 'M':\n", " return x\n", "df['raised'] = df['raised'].apply(lambda x : parse_raised(x))\n", "df = df.reset_index(drop=True)\n", "\n", "from sklearn.neighbors import NearestNeighbors\n", "import numpy as np\n", "import pandas as pd\n", "from sentence_transformers import SentenceTransformer" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n", " warnings.warn(\n", "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 1}\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7896\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "a\n" ] } ], "source": [ "def filter_df(df, column_name, filter_type, filter_value):\n", " if filter_type == '==':\n", " df_filtered = df[df[column_name]==filter_value]\n", " elif filter_type == '>=':\n", " df_filtered = df[df[column_name]>=filter_value]\n", " elif filter_type == '<=':\n", " df_filtered = df[df[column_name]<=filter_value]\n", " elif filter_type == 'contains':\n", " df_filtered = df[df['target'].str.contains(filter_value)]\n", " return df_filtered\n", "\n", "def search(df, query):\n", " product = model.encode(query).tolist()\n", " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n", "\n", " #prepare model\n", " nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n", "\n", " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n", "\n", " #print out the description of every recommended product\n", " return df.iloc[list(indices)[0]][['name', 'description', 'raised', 'year', 'target', 'size', 'stage', 'tags']]\n", "\n", "#the first module becomes text1, the second module file1\n", "def greet(size, target, raised, query): \n", " df_size = filter_df(df, 'size', '==', size)\n", " df_target = filter_df(df_size, 'target', 'contains', target)\n", " def raised_zero(x):\n", " if x == 0:\n", " return 'Undisclosed'\n", " else:\n", " return x\n", " print('a')\n", " df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]\n", " df_knn = search(df_raised, query)\n", " #we live the sorting for last\n", " df_knn = df_knn.sort_values('raised', ascending=False)\n", " df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))\n", "\n", " return df_knn\n", "\n", "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n", " gr.Markdown(\n", " \"\"\"\n", " # Gradio with History\n", " \"\"\"\n", " )\n", " size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+'], multiselect=False, value='11-50', label='size')\n", " target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], value='B2B', multiselect=False, label='target')\n", " raised = gr.Slider(0, 20, value=5, step_size=1, label=\"Minimum raising (in Millions)\")\n", " query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')\n", " btn = gr.Button(value=\"Search for a Startup\")\n", " output1 = gr.DataFrame(label='value')\n", " # btn.click(greet, inputs='text', outputs=['dataframe'])\n", " btn.click(greet, [size, target, raised, query], [output1])\n", "demo.launch(share=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }