{ "cells": [ { "cell_type": "code", "execution_count": 78, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
namedescriptionyeartargetsizestageraisedtagstext_vector_
00.10 of a SecondSmart Indicators for Connected Vehicles2019.0B2B1-10Pre-FundingUndisclosed[connected-vehicles, adas, autonomous-vehicles...[-0.031224824488162994, -0.06342269480228424, ...
112trixMath Lessons for Young Kids2012.0B2B, B2C1-10Pre-FundingUndisclosed[sdg, schools, pre-k, serious-games, games, mo...[-0.038649097084999084, 0.028091922402381897, ...
21E TherapeuticsNovel RNA-targeting Drugs2021.0B2B51-200Seed$120M[pharmaceuticals, chronic-disease, immunology,...[0.04561534896492958, -0.017776092514395714, 0...
31MRoboticsRetail Automation Solutions with Nano Fulfillment2021.0B2B11-50A$25M[omni-channel, ecommerce, climate-tech, artifi...[0.0024080690927803516, -0.03042100928723812, ...
41touch.ioPersonal Data Flow Tracking and Data Cataloging2017.0B2B51-200A$16.1M[enterprise-solutions, data-protection, cyber-...[-0.01007091999053955, 0.10431888699531555, -0...
..............................
4981YOW HRHuman Resources Engagement and Optimization Pl...2020.0B2B, B2B2C1-10Pre-FundingUndisclosed[content-creators, e-learning, software-applic...[0.026961881667375565, 0.002459645736962557, -...
4982Yummi Home FoodMarketplace for Homemade Food2012.0B2C11-50Pre-FundingUndisclosed[ecommerce, p2p, delivery, online-shopping, ma...[0.0036857957020401955, 0.03582162782549858, -...
4983Yung-EtgarCustom Mechanized Harvesting Systems1982.0B2B51-200MatureUndisclosed[crops, agtech, harvesting, machinery, sdg, cl...[0.027293115854263306, 0.010461761616170406, 0...
4984YuviTalDigital Health and Fitness Solutions for Organ...2017.0B2B, B2C, B2G11-50Pre-FundingUndisclosed[fitness, digital-wallet, discount, mobile-app...[0.02851911261677742, 0.05474231392145157, -0....
4985Z-squareMicroendoscope for Minimally Invasive Imaging ...2013.0B2B11-50Seed$10M[endoscopy, medical-devices, minimally-invasiv...[0.012587728910148144, -0.07959864288568497, -...
\n", "

4986 rows × 9 columns

\n", "
" ], "text/plain": [ " name description \\\n", "0 0.10 of a Second Smart Indicators for Connected Vehicles \n", "1 12trix Math Lessons for Young Kids \n", "2 1E Therapeutics Novel RNA-targeting Drugs \n", "3 1MRobotics Retail Automation Solutions with Nano Fulfillment \n", "4 1touch.io Personal Data Flow Tracking and Data Cataloging \n", "... ... ... \n", "4981 YOW HR Human Resources Engagement and Optimization Pl... \n", "4982 Yummi Home Food Marketplace for Homemade Food \n", "4983 Yung-Etgar Custom Mechanized Harvesting Systems \n", "4984 YuviTal Digital Health and Fitness Solutions for Organ... \n", "4985 Z-square Microendoscope for Minimally Invasive Imaging ... \n", "\n", " year target size stage raised \\\n", "0 2019.0 B2B 1-10 Pre-Funding Undisclosed \n", "1 2012.0 B2B, B2C 1-10 Pre-Funding Undisclosed \n", "2 2021.0 B2B 51-200 Seed $120M \n", "3 2021.0 B2B 11-50 A $25M \n", "4 2017.0 B2B 51-200 A $16.1M \n", "... ... ... ... ... ... \n", "4981 2020.0 B2B, B2B2C 1-10 Pre-Funding Undisclosed \n", "4982 2012.0 B2C 11-50 Pre-Funding Undisclosed \n", "4983 1982.0 B2B 51-200 Mature Undisclosed \n", "4984 2017.0 B2B, B2C, B2G 11-50 Pre-Funding Undisclosed \n", "4985 2013.0 B2B 11-50 Seed $10M \n", "\n", " tags \\\n", "0 [connected-vehicles, adas, autonomous-vehicles... \n", "1 [sdg, schools, pre-k, serious-games, games, mo... \n", "2 [pharmaceuticals, chronic-disease, immunology,... \n", "3 [omni-channel, ecommerce, climate-tech, artifi... \n", "4 [enterprise-solutions, data-protection, cyber-... \n", "... ... \n", "4981 [content-creators, e-learning, software-applic... \n", "4982 [ecommerce, p2p, delivery, online-shopping, ma... \n", "4983 [crops, agtech, harvesting, machinery, sdg, cl... \n", "4984 [fitness, digital-wallet, discount, mobile-app... \n", "4985 [endoscopy, medical-devices, minimally-invasiv... \n", "\n", " text_vector_ \n", "0 [-0.031224824488162994, -0.06342269480228424, ... \n", "1 [-0.038649097084999084, 0.028091922402381897, ... \n", "2 [0.04561534896492958, -0.017776092514395714, 0... \n", "3 [0.0024080690927803516, -0.03042100928723812, ... \n", "4 [-0.01007091999053955, 0.10431888699531555, -0... \n", "... ... \n", "4981 [0.026961881667375565, 0.002459645736962557, -... \n", "4982 [0.0036857957020401955, 0.03582162782549858, -... \n", "4983 [0.027293115854263306, 0.010461761616170406, 0... \n", "4984 [0.02851911261677742, 0.05474231392145157, -0.... \n", "4985 [0.012587728910148144, -0.07959864288568497, -... \n", "\n", "[4986 rows x 9 columns]" ] }, "execution_count": 78, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "\n", "df = pd.read_parquet('df_encoded.parquet')\n", "df" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "import os\n", "os.system('pip install openpyxl')\n", "os.system('pip install sentence-transformers')\n", "import pandas as pd\n", "import gradio as gr\n", "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n", "\n", "df = pd.read_parquet('df_encoded.parquet')\n", "df['tags'] = df['tags'].apply(lambda x : str(x))\n", "def parse_raised(x):\n", " if x == 'Undisclosed':\n", " return 0\n", " else: \n", " quantifier = x[-1]\n", " x = float(x[1:-1])\n", " if quantifier == 'K':\n", " return x/1000\n", " elif quantifier == 'M':\n", " return x\n", "df['raised'] = df['raised'].apply(lambda x : parse_raised(x))\n", "df = df.reset_index(drop=True)\n", "\n", "from sklearn.neighbors import NearestNeighbors\n", "import numpy as np\n", "import pandas as pd\n", "from sentence_transformers import SentenceTransformer" ] }, { "cell_type": "code", "execution_count": 92, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n", " warnings.warn(\n", "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 1}\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7896\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "a\n" ] } ], "source": [ "def filter_df(df, column_name, filter_type, filter_value):\n", " if filter_type == '==':\n", " df_filtered = df[df[column_name]==filter_value]\n", " elif filter_type == '>=':\n", " df_filtered = df[df[column_name]>=filter_value]\n", " elif filter_type == '<=':\n", " df_filtered = df[df[column_name]<=filter_value]\n", " elif filter_type == 'contains':\n", " df_filtered = df[df['target'].str.contains(filter_value)]\n", " return df_filtered\n", "\n", "def search(df, query):\n", " product = model.encode(query).tolist()\n", " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n", "\n", " #prepare model\n", " nbrs = NearestNeighbors(n_neighbors=20, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n", "\n", " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n", "\n", " #print out the description of every recommended product\n", " return df.iloc[list(indices)[0]][['name', 'description', 'raised', 'year', 'target', 'size', 'stage', 'tags']]\n", "\n", "#the first module becomes text1, the second module file1\n", "def greet(size, target, raised, query): \n", " df_size = filter_df(df, 'size', '==', size)\n", " df_target = filter_df(df_size, 'target', 'contains', target)\n", " def raised_zero(x):\n", " if x == 0:\n", " return 'Undisclosed'\n", " else:\n", " return x\n", " print('a')\n", " df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]\n", " df_knn = search(df_raised, query)\n", " #we live the sorting for last\n", " df_knn = df_knn.sort_values('raised', ascending=False)\n", " df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))\n", "\n", " return df_knn\n", "\n", "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n", " gr.Markdown(\n", " \"\"\"\n", " # Gradio with History\n", " \"\"\"\n", " )\n", " size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+'], multiselect=False, value='11-50', label='size')\n", " target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], value='B2B', multiselect=False, label='target')\n", " raised = gr.Slider(0, 20, value=5, step_size=1, label=\"Minimum raising (in Millions)\")\n", " query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')\n", " btn = gr.Button(value=\"Search for a Startup\")\n", " output1 = gr.DataFrame(label='value')\n", " # btn.click(greet, inputs='text', outputs=['dataframe'])\n", " btn.click(greet, [size, target, raised, query], [output1])\n", "demo.launch(share=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }