Michelangiolo commited on
Commit
3292cb1
1 Parent(s): 3d4e8d2
_test.ipynb DELETED
@@ -1,215 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "code",
5
- "execution_count": null,
6
- "metadata": {},
7
- "outputs": [],
8
- "source": [
9
- "import os\n",
10
- "# os.system('pip install openpyxl')\n",
11
- "# os.system('pip install sentence-transformers')\n",
12
- "import pandas as pd\n",
13
- "import gradio as gr\n",
14
- "from sentence_transformers import SentenceTransformer\n",
15
- "\n",
16
- "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n",
17
- "\n",
18
- "df = pd.read_parquet('df_encoded3.parquet')\n",
19
- "df['tags'] = df['tags'].apply(lambda x : str(x))\n",
20
- "def parse_raised(x):\n",
21
- " if x == 'Undisclosed':\n",
22
- " return 0\n",
23
- " else: \n",
24
- " quantifier = x[-1]\n",
25
- " x = float(x[1:-1])\n",
26
- " if quantifier == 'K':\n",
27
- " return x/1000\n",
28
- " elif quantifier == 'M':\n",
29
- " return x\n",
30
- "df['raised'] = df['raised'].apply(lambda x : parse_raised(x))\n",
31
- "df['stage'] = df['stage'].apply(lambda x : x.lower())\n",
32
- "df = df.reset_index(drop=True)\n",
33
- "\n",
34
- "from sklearn.neighbors import NearestNeighbors\n",
35
- "import pandas as pd\n",
36
- "from sentence_transformers import SentenceTransformer\n",
37
- "\n",
38
- "nbrs = NearestNeighbors(n_neighbors=5000, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
39
- "\n",
40
- "def search(df, query):\n",
41
- " product = model.encode(query).tolist()\n",
42
- " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n",
43
- "\n",
44
- " #prepare model\n",
45
- " # \n",
46
- " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n",
47
- "\n",
48
- " #print out the description of every recommended product\n",
49
- " return df.iloc[list(indices)[0]][['name', 'raised', 'target', 'size', 'stage', 'country', 'source', 'description', 'tags']]\n",
50
- "\n",
51
- "def filter_df(df, column_name, filter_type, filter_value, minimum_acceptable_size=0):\n",
52
- " if filter_type == '==':\n",
53
- " df_filtered = df[df[column_name]==filter_value]\n",
54
- " elif filter_type == '>=':\n",
55
- " df_filtered = df[df[column_name]>=filter_value]\n",
56
- " elif filter_type == '<=':\n",
57
- " df_filtered = df[df[column_name]<=filter_value]\n",
58
- " elif filter_type == 'contains':\n",
59
- " df_filtered = df[df['target'].str.contains(filter_value)]\n",
60
- "\n",
61
- " if df_filtered.size >= minimum_acceptable_size:\n",
62
- " return df_filtered\n",
63
- " else:\n",
64
- " return df"
65
- ]
66
- },
67
- {
68
- "cell_type": "code",
69
- "execution_count": 75,
70
- "metadata": {},
71
- "outputs": [
72
- {
73
- "name": "stderr",
74
- "output_type": "stream",
75
- "text": [
76
- "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
77
- " warnings.warn(\n"
78
- ]
79
- },
80
- {
81
- "name": "stdout",
82
- "output_type": "stream",
83
- "text": [
84
- "Running on local URL: http://127.0.0.1:7909\n",
85
- "\n",
86
- "To create a public link, set `share=True` in `launch()`.\n"
87
- ]
88
- },
89
- {
90
- "data": {
91
- "text/html": [
92
- "<div><iframe src=\"http://127.0.0.1:7909/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
93
- ],
94
- "text/plain": [
95
- "<IPython.core.display.HTML object>"
96
- ]
97
- },
98
- "metadata": {},
99
- "output_type": "display_data"
100
- },
101
- {
102
- "data": {
103
- "text/plain": []
104
- },
105
- "execution_count": 75,
106
- "metadata": {},
107
- "output_type": "execute_result"
108
- },
109
- {
110
- "name": "stdout",
111
- "output_type": "stream",
112
- "text": [
113
- "459\n",
114
- "27\n"
115
- ]
116
- }
117
- ],
118
- "source": [
119
- "#the first module becomes text1, the second module file1\n",
120
- "def greet(size, target, stage, query): \n",
121
- " def raised_zero(x):\n",
122
- " if x == 0:\n",
123
- " return 'Undisclosed'\n",
124
- " else:\n",
125
- " return x\n",
126
- " df_knn = search(df, query)\n",
127
- " df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))\n",
128
- "\n",
129
- " df_size = filter_df(df_knn, 'size', '==', size, 1)\n",
130
- "\n",
131
- " if stage != 'ALL':\n",
132
- " df_stage = filter_df(df_size, 'stage', '==', stage.lower(), 1)\n",
133
- " else:\n",
134
- " #we bypass the filter\n",
135
- " df_stage = df_size\n",
136
- "\n",
137
- " print(df_stage.size)\n",
138
- "\n",
139
- " df_target = filter_df(df_stage, 'target', 'contains', target, 1)\n",
140
- " \n",
141
- " # display(df_stage)\n",
142
- " # df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]\n",
143
- "\n",
144
- " #we live the sorting for last\n",
145
- " return df_target[0:100] #.sort_values('raised', ascending=False)\n",
146
- "\n",
147
- "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
148
- " gr.Markdown(\n",
149
- " \"\"\"\n",
150
- " # Startup Search Engine\n",
151
- " \"\"\"\n",
152
- " )\n",
153
- " size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+', '11-500+'], multiselect=False, value='11-500+', label='size')\n",
154
- " target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], multiselect=False, value='B2B', label='target')\n",
155
- " stage = gr.Radio(['pre-seed', 'A', 'B', 'C', 'ALL'], multiselect=False, value='ALL', label='stage')\n",
156
- " # raised = gr.Slider(0, 20, value=5, step_size=1, label=\"Minimum raising (in Millions)\")\n",
157
- " query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')\n",
158
- " btn = gr.Button(value=\"Search for a Startup\")\n",
159
- " output1 = gr.DataFrame(label='value')\n",
160
- " # btn.click(greet, inputs='text', outputs=['dataframe'])\n",
161
- " btn.click(greet, [size, target, stage, query], [output1])\n",
162
- "demo.launch(share=False)"
163
- ]
164
- },
165
- {
166
- "cell_type": "code",
167
- "execution_count": null,
168
- "metadata": {},
169
- "outputs": [],
170
- "source": [
171
- "# Define database of sentences\n",
172
- "sentences = pd.Series(['The quick brown fox jumps over the lazy dog',\n",
173
- " 'A quick brown dog jumps over the lazy fox',\n",
174
- " 'The lazy dog jumps over the quick brown fox',\n",
175
- " 'The quick brown fox jumps over the lazy cat',\n",
176
- " 'The quick brown cat jumps over the lazy dog'])\n",
177
- "\n",
178
- "# Encode sentences\n",
179
- "sentence_embeddings = model.encode(sentences)\n",
180
- "\n",
181
- "# Define query sentence\n",
182
- "query = 'A lazy dog jumps over the quick brown fox'\n",
183
- "\n",
184
- "# Encode query\n",
185
- "query_embedding = model.encode(query)\n",
186
- "\n",
187
- "# Search for similar sentences\n",
188
- "cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)\n",
189
- "most_similar_sentence = sentences[cosine_scores.argmax()]"
190
- ]
191
- }
192
- ],
193
- "metadata": {
194
- "kernelspec": {
195
- "display_name": "Python 3",
196
- "language": "python",
197
- "name": "python3"
198
- },
199
- "language_info": {
200
- "codemirror_mode": {
201
- "name": "ipython",
202
- "version": 3
203
- },
204
- "file_extension": ".py",
205
- "mimetype": "text/x-python",
206
- "name": "python",
207
- "nbconvert_exporter": "python",
208
- "pygments_lexer": "ipython3",
209
- "version": "3.9.13"
210
- },
211
- "orig_nbformat": 4
212
- },
213
- "nbformat": 4,
214
- "nbformat_minor": 2
215
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a.ipynb DELETED
File without changes
competitor_analysis.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ The aging population is rapidly growing and there is an increasing demand for solutions to address the unique challenges and opportunities it presents. There are a number of startups that are focused on providing solutions to this market, such as Assured Allies, AgeRate, AgeNation, Modern Age, Age Labs, Klogene Therapeutics, Inc., Spring Discovery, Worthright, and RetroBrain R&amp;D GmbH. The majority of these companies are at the pre-seed stage and are focused on providing health and wellness solutions, drug discovery, life insurance, and therapeutic video games for the aging population.
2
+ Each of these startups offers a unique solution to the aging population, either through providing health and wellness solutions, drug discovery, life insurance, or therapeutic video games. Assured Allies, AgeRate, and AgeNation are all focused on providing health and wellness solutions, while Age Labs and Klogene Therapeutics, Inc. are focused on drug discovery. Worthright is focused on providing life insurance solutions, and RetroBrain R&amp;D GmbH is focused on providing therapeutic video games. Spring Discovery is focused on both drug discovery and targeting the biological processes of aging itself.
3
+ Overall, these startups are providing a range of solutions to the aging population, as well as offering the potential to develop new treatments and therapies. However, they are all in the early stages of development and have yet to prove themselves in the market. As such, it will be important to monitor their progress and evaluate their potential impact on the aging population.
competitory_analysis.ipynb ADDED
@@ -0,0 +1,1496 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 2,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "data": {
10
+ "application/vnd.plotly.v1+json": {
11
+ "config": {
12
+ "plotlyServerURL": "https://plot.ly"
13
+ },
14
+ "data": [
15
+ {
16
+ "hovertemplate": "x=%{x}<br>y=%{y}<extra></extra>",
17
+ "legendgroup": "",
18
+ "marker": {
19
+ "color": "#636efa",
20
+ "symbol": "circle"
21
+ },
22
+ "mode": "markers",
23
+ "name": "",
24
+ "orientation": "v",
25
+ "showlegend": false,
26
+ "type": "scatter",
27
+ "x": [
28
+ 0.11065170952653092,
29
+ 0.9313424805150406,
30
+ -1.5996490940811126,
31
+ -0.8519844340509644,
32
+ -1.7340464584342041,
33
+ -0.3775897479971143,
34
+ 0.9342174794849762,
35
+ -0.3661428747413339,
36
+ -1.2932493735425519,
37
+ -0.49093114163634255,
38
+ -0.3551007466229788,
39
+ -1.5340231679496692,
40
+ 0.4078294367610518,
41
+ -2.1442638646046617,
42
+ 0.6231023249264844,
43
+ -0.6998619957260692,
44
+ -0.1113983771908388,
45
+ 0.7285629389163905,
46
+ 0.46044422901665105,
47
+ 0.6814905041672645,
48
+ 0.4219703127102215,
49
+ -0.7752652184393516,
50
+ 0.7955340314268295,
51
+ -0.7314030456466243,
52
+ 0.9872641829556011,
53
+ -0.6583629187263107,
54
+ 0.6047801828460544,
55
+ 0.20295411232272387,
56
+ -0.3072853007045659,
57
+ -0.04825357656986248,
58
+ 0.22843186056424186,
59
+ 0.3715182306968289,
60
+ -0.8177510455428024,
61
+ 0.42432092526509263,
62
+ 1.3846356216909304,
63
+ 0.12614132328028024,
64
+ 2.621758555667932,
65
+ -1.0520216457461542,
66
+ -1.9875946687784296,
67
+ 1.2126073567588975,
68
+ -0.005340800043309528,
69
+ 0.6425833097783461,
70
+ -0.23085575191583718,
71
+ -0.39870692645269185,
72
+ 0.22001362060737853,
73
+ 0.6935952715973245,
74
+ -0.23899657873918492,
75
+ -0.2647563455109995,
76
+ -0.1552225159983857,
77
+ 0.8367589770008363,
78
+ 0.6144945885021259,
79
+ -1.5671191790556918,
80
+ -0.18360560911852558,
81
+ 0.6053804394441732,
82
+ 0.6702519070882618,
83
+ -1.5371262060244244,
84
+ -1.8799101652646657,
85
+ 0.8274294657447646,
86
+ -0.9778838736457496,
87
+ -1.32563441413571,
88
+ -0.47131523078112636,
89
+ -0.8237529087079579,
90
+ 0.0749440530096387,
91
+ -1.1982510124495016,
92
+ -0.48032273082852944,
93
+ 1.6803302848658004,
94
+ -0.1877506957031164,
95
+ -0.21457440257322793,
96
+ -1.6357931534618344,
97
+ 0.06631255614130224,
98
+ 1.033698385736438,
99
+ 0.6557679210979364,
100
+ 0.906246158318645,
101
+ 0.6040398563966467,
102
+ 0.3778948128345762,
103
+ -0.4923550753337614,
104
+ 0.39072985960174966,
105
+ 0.9855721759276989,
106
+ -0.7409752742572114,
107
+ -1.2737238002887008,
108
+ 0.22940454505219351,
109
+ 0.7819262496258474,
110
+ -0.4288546165934158,
111
+ 0.8572089084430538,
112
+ -0.2903102705549142,
113
+ -0.7319677340190642,
114
+ 1.2303065042277,
115
+ -1.9725578060317988,
116
+ 0.32598692464265033,
117
+ -0.5876222492318771,
118
+ 0.8007952391605019,
119
+ -1.4990544921428144,
120
+ -0.8627915577138465,
121
+ -0.6765202184953598,
122
+ 1.1954685998825016,
123
+ 0.5950046597901215,
124
+ 1.975336422109746,
125
+ -1.5833977844592078,
126
+ 0.14195434583624839,
127
+ -0.35137002654826655
128
+ ],
129
+ "xaxis": "x",
130
+ "y": [
131
+ -0.4629101105537382,
132
+ -0.2328003707850657,
133
+ 0.08828254314043757,
134
+ 0.8482893235591087,
135
+ 0.0771552141212955,
136
+ 0.45420000337695093,
137
+ 0.6107289239044659,
138
+ 0.12055603761846795,
139
+ -0.7550448080765189,
140
+ -1.838932065060547,
141
+ 0.4806534709489732,
142
+ 1.151346161003326,
143
+ 0.2478735044883051,
144
+ 0.06641255531994601,
145
+ -0.5489097568589677,
146
+ -0.4973455322789025,
147
+ 0.4724629696550256,
148
+ 1.3080386645719073,
149
+ -1.0194257866836292,
150
+ 1.3661003420325573,
151
+ 1.4517050942871168,
152
+ 2.2756191195760964,
153
+ 0.6866903934626691,
154
+ 0.05090384642811877,
155
+ -1.042704891086457,
156
+ -0.3320134837963288,
157
+ -1.0765856360639627,
158
+ 0.6605128494068281,
159
+ -1.128281682848767,
160
+ -0.7553027387022911,
161
+ -0.29547549213277924,
162
+ -0.33700831200450676,
163
+ -0.10941772118430755,
164
+ 1.68310769353848,
165
+ -0.19229315857114024,
166
+ 0.8294415908956625,
167
+ -1.3417039289417374,
168
+ -0.7048263733247031,
169
+ 1.168681096473467,
170
+ -0.8932875274711288,
171
+ 1.5922472197868605,
172
+ -1.066334442512193,
173
+ 0.5498063882137004,
174
+ 1.7803337014200828,
175
+ 0.4327588386400595,
176
+ -1.82700153607581,
177
+ -0.2847320646428013,
178
+ -1.2718678778713373,
179
+ 0.030647100939450736,
180
+ -0.3726484952087042,
181
+ 1.24677793821802,
182
+ -0.9627886538790479,
183
+ 0.053176481171674006,
184
+ 0.21228159481404182,
185
+ 2.2210778206099895,
186
+ -0.3210863506096476,
187
+ -0.13099360273041932,
188
+ -1.3584896597178582,
189
+ 2.6607319250019947,
190
+ -0.41318978236361714,
191
+ -1.0836282188237547,
192
+ -0.4870997733370743,
193
+ -0.9613820399144345,
194
+ 0.8207835881444858,
195
+ 1.1732660854078711,
196
+ -0.0695355567682757,
197
+ -1.26447561478453,
198
+ -0.5190975876980604,
199
+ 0.4607032579572695,
200
+ -0.09378808026636432,
201
+ -0.12440072801979105,
202
+ 1.8745088793182847,
203
+ 0.454356329723802,
204
+ 1.359892435465479,
205
+ -0.01437712091152146,
206
+ -0.8529776587086099,
207
+ -0.4959037185693348,
208
+ -0.3351558645302966,
209
+ 0.041836528406660786,
210
+ 1.4252605845255253,
211
+ -0.07426595321119726,
212
+ 0.9202368920501296,
213
+ -0.5671337897282676,
214
+ -0.600754544014511,
215
+ 1.3359736900383594,
216
+ 0.16076219666418068,
217
+ 0.6872459440118796,
218
+ 0.7243820452672731,
219
+ 0.24446435572716743,
220
+ -1.1772300880788433,
221
+ 1.892655561835316,
222
+ 1.185630814982556,
223
+ -0.8623991303777663,
224
+ -1.5808521340820116,
225
+ 0.23459772035678836,
226
+ 0.4098229267414775,
227
+ -0.4550057901832996,
228
+ -0.2261330459822914,
229
+ 0.23013128978783395,
230
+ 0.7516276562909543
231
+ ],
232
+ "yaxis": "y"
233
+ },
234
+ {
235
+ "marker": {
236
+ "color": "red",
237
+ "size": 10
238
+ },
239
+ "mode": "markers",
240
+ "type": "scatter",
241
+ "x": [
242
+ -0.1552225159983857,
243
+ 1.975336422109746,
244
+ -1.2737238002887008,
245
+ -0.8627915577138465,
246
+ -1.6357931534618344,
247
+ -0.6583629187263107,
248
+ 0.8007952391605019,
249
+ -0.4923550753337614,
250
+ 0.14195434583624839,
251
+ 0.6231023249264844
252
+ ],
253
+ "y": [
254
+ 0.030647100939450736,
255
+ -0.4550057901832996,
256
+ 1.4252605845255253,
257
+ -0.8623991303777663,
258
+ 0.4607032579572695,
259
+ -0.3320134837963288,
260
+ 1.892655561835316,
261
+ -0.8529776587086099,
262
+ 0.23013128978783395,
263
+ -0.5489097568589677
264
+ ]
265
+ }
266
+ ],
267
+ "layout": {
268
+ "legend": {
269
+ "tracegroupgap": 0
270
+ },
271
+ "margin": {
272
+ "t": 60
273
+ },
274
+ "template": {
275
+ "data": {
276
+ "bar": [
277
+ {
278
+ "error_x": {
279
+ "color": "#2a3f5f"
280
+ },
281
+ "error_y": {
282
+ "color": "#2a3f5f"
283
+ },
284
+ "marker": {
285
+ "line": {
286
+ "color": "#E5ECF6",
287
+ "width": 0.5
288
+ },
289
+ "pattern": {
290
+ "fillmode": "overlay",
291
+ "size": 10,
292
+ "solidity": 0.2
293
+ }
294
+ },
295
+ "type": "bar"
296
+ }
297
+ ],
298
+ "barpolar": [
299
+ {
300
+ "marker": {
301
+ "line": {
302
+ "color": "#E5ECF6",
303
+ "width": 0.5
304
+ },
305
+ "pattern": {
306
+ "fillmode": "overlay",
307
+ "size": 10,
308
+ "solidity": 0.2
309
+ }
310
+ },
311
+ "type": "barpolar"
312
+ }
313
+ ],
314
+ "carpet": [
315
+ {
316
+ "aaxis": {
317
+ "endlinecolor": "#2a3f5f",
318
+ "gridcolor": "white",
319
+ "linecolor": "white",
320
+ "minorgridcolor": "white",
321
+ "startlinecolor": "#2a3f5f"
322
+ },
323
+ "baxis": {
324
+ "endlinecolor": "#2a3f5f",
325
+ "gridcolor": "white",
326
+ "linecolor": "white",
327
+ "minorgridcolor": "white",
328
+ "startlinecolor": "#2a3f5f"
329
+ },
330
+ "type": "carpet"
331
+ }
332
+ ],
333
+ "choropleth": [
334
+ {
335
+ "colorbar": {
336
+ "outlinewidth": 0,
337
+ "ticks": ""
338
+ },
339
+ "type": "choropleth"
340
+ }
341
+ ],
342
+ "contour": [
343
+ {
344
+ "colorbar": {
345
+ "outlinewidth": 0,
346
+ "ticks": ""
347
+ },
348
+ "colorscale": [
349
+ [
350
+ 0,
351
+ "#0d0887"
352
+ ],
353
+ [
354
+ 0.1111111111111111,
355
+ "#46039f"
356
+ ],
357
+ [
358
+ 0.2222222222222222,
359
+ "#7201a8"
360
+ ],
361
+ [
362
+ 0.3333333333333333,
363
+ "#9c179e"
364
+ ],
365
+ [
366
+ 0.4444444444444444,
367
+ "#bd3786"
368
+ ],
369
+ [
370
+ 0.5555555555555556,
371
+ "#d8576b"
372
+ ],
373
+ [
374
+ 0.6666666666666666,
375
+ "#ed7953"
376
+ ],
377
+ [
378
+ 0.7777777777777778,
379
+ "#fb9f3a"
380
+ ],
381
+ [
382
+ 0.8888888888888888,
383
+ "#fdca26"
384
+ ],
385
+ [
386
+ 1,
387
+ "#f0f921"
388
+ ]
389
+ ],
390
+ "type": "contour"
391
+ }
392
+ ],
393
+ "contourcarpet": [
394
+ {
395
+ "colorbar": {
396
+ "outlinewidth": 0,
397
+ "ticks": ""
398
+ },
399
+ "type": "contourcarpet"
400
+ }
401
+ ],
402
+ "heatmap": [
403
+ {
404
+ "colorbar": {
405
+ "outlinewidth": 0,
406
+ "ticks": ""
407
+ },
408
+ "colorscale": [
409
+ [
410
+ 0,
411
+ "#0d0887"
412
+ ],
413
+ [
414
+ 0.1111111111111111,
415
+ "#46039f"
416
+ ],
417
+ [
418
+ 0.2222222222222222,
419
+ "#7201a8"
420
+ ],
421
+ [
422
+ 0.3333333333333333,
423
+ "#9c179e"
424
+ ],
425
+ [
426
+ 0.4444444444444444,
427
+ "#bd3786"
428
+ ],
429
+ [
430
+ 0.5555555555555556,
431
+ "#d8576b"
432
+ ],
433
+ [
434
+ 0.6666666666666666,
435
+ "#ed7953"
436
+ ],
437
+ [
438
+ 0.7777777777777778,
439
+ "#fb9f3a"
440
+ ],
441
+ [
442
+ 0.8888888888888888,
443
+ "#fdca26"
444
+ ],
445
+ [
446
+ 1,
447
+ "#f0f921"
448
+ ]
449
+ ],
450
+ "type": "heatmap"
451
+ }
452
+ ],
453
+ "heatmapgl": [
454
+ {
455
+ "colorbar": {
456
+ "outlinewidth": 0,
457
+ "ticks": ""
458
+ },
459
+ "colorscale": [
460
+ [
461
+ 0,
462
+ "#0d0887"
463
+ ],
464
+ [
465
+ 0.1111111111111111,
466
+ "#46039f"
467
+ ],
468
+ [
469
+ 0.2222222222222222,
470
+ "#7201a8"
471
+ ],
472
+ [
473
+ 0.3333333333333333,
474
+ "#9c179e"
475
+ ],
476
+ [
477
+ 0.4444444444444444,
478
+ "#bd3786"
479
+ ],
480
+ [
481
+ 0.5555555555555556,
482
+ "#d8576b"
483
+ ],
484
+ [
485
+ 0.6666666666666666,
486
+ "#ed7953"
487
+ ],
488
+ [
489
+ 0.7777777777777778,
490
+ "#fb9f3a"
491
+ ],
492
+ [
493
+ 0.8888888888888888,
494
+ "#fdca26"
495
+ ],
496
+ [
497
+ 1,
498
+ "#f0f921"
499
+ ]
500
+ ],
501
+ "type": "heatmapgl"
502
+ }
503
+ ],
504
+ "histogram": [
505
+ {
506
+ "marker": {
507
+ "pattern": {
508
+ "fillmode": "overlay",
509
+ "size": 10,
510
+ "solidity": 0.2
511
+ }
512
+ },
513
+ "type": "histogram"
514
+ }
515
+ ],
516
+ "histogram2d": [
517
+ {
518
+ "colorbar": {
519
+ "outlinewidth": 0,
520
+ "ticks": ""
521
+ },
522
+ "colorscale": [
523
+ [
524
+ 0,
525
+ "#0d0887"
526
+ ],
527
+ [
528
+ 0.1111111111111111,
529
+ "#46039f"
530
+ ],
531
+ [
532
+ 0.2222222222222222,
533
+ "#7201a8"
534
+ ],
535
+ [
536
+ 0.3333333333333333,
537
+ "#9c179e"
538
+ ],
539
+ [
540
+ 0.4444444444444444,
541
+ "#bd3786"
542
+ ],
543
+ [
544
+ 0.5555555555555556,
545
+ "#d8576b"
546
+ ],
547
+ [
548
+ 0.6666666666666666,
549
+ "#ed7953"
550
+ ],
551
+ [
552
+ 0.7777777777777778,
553
+ "#fb9f3a"
554
+ ],
555
+ [
556
+ 0.8888888888888888,
557
+ "#fdca26"
558
+ ],
559
+ [
560
+ 1,
561
+ "#f0f921"
562
+ ]
563
+ ],
564
+ "type": "histogram2d"
565
+ }
566
+ ],
567
+ "histogram2dcontour": [
568
+ {
569
+ "colorbar": {
570
+ "outlinewidth": 0,
571
+ "ticks": ""
572
+ },
573
+ "colorscale": [
574
+ [
575
+ 0,
576
+ "#0d0887"
577
+ ],
578
+ [
579
+ 0.1111111111111111,
580
+ "#46039f"
581
+ ],
582
+ [
583
+ 0.2222222222222222,
584
+ "#7201a8"
585
+ ],
586
+ [
587
+ 0.3333333333333333,
588
+ "#9c179e"
589
+ ],
590
+ [
591
+ 0.4444444444444444,
592
+ "#bd3786"
593
+ ],
594
+ [
595
+ 0.5555555555555556,
596
+ "#d8576b"
597
+ ],
598
+ [
599
+ 0.6666666666666666,
600
+ "#ed7953"
601
+ ],
602
+ [
603
+ 0.7777777777777778,
604
+ "#fb9f3a"
605
+ ],
606
+ [
607
+ 0.8888888888888888,
608
+ "#fdca26"
609
+ ],
610
+ [
611
+ 1,
612
+ "#f0f921"
613
+ ]
614
+ ],
615
+ "type": "histogram2dcontour"
616
+ }
617
+ ],
618
+ "mesh3d": [
619
+ {
620
+ "colorbar": {
621
+ "outlinewidth": 0,
622
+ "ticks": ""
623
+ },
624
+ "type": "mesh3d"
625
+ }
626
+ ],
627
+ "parcoords": [
628
+ {
629
+ "line": {
630
+ "colorbar": {
631
+ "outlinewidth": 0,
632
+ "ticks": ""
633
+ }
634
+ },
635
+ "type": "parcoords"
636
+ }
637
+ ],
638
+ "pie": [
639
+ {
640
+ "automargin": true,
641
+ "type": "pie"
642
+ }
643
+ ],
644
+ "scatter": [
645
+ {
646
+ "fillpattern": {
647
+ "fillmode": "overlay",
648
+ "size": 10,
649
+ "solidity": 0.2
650
+ },
651
+ "type": "scatter"
652
+ }
653
+ ],
654
+ "scatter3d": [
655
+ {
656
+ "line": {
657
+ "colorbar": {
658
+ "outlinewidth": 0,
659
+ "ticks": ""
660
+ }
661
+ },
662
+ "marker": {
663
+ "colorbar": {
664
+ "outlinewidth": 0,
665
+ "ticks": ""
666
+ }
667
+ },
668
+ "type": "scatter3d"
669
+ }
670
+ ],
671
+ "scattercarpet": [
672
+ {
673
+ "marker": {
674
+ "colorbar": {
675
+ "outlinewidth": 0,
676
+ "ticks": ""
677
+ }
678
+ },
679
+ "type": "scattercarpet"
680
+ }
681
+ ],
682
+ "scattergeo": [
683
+ {
684
+ "marker": {
685
+ "colorbar": {
686
+ "outlinewidth": 0,
687
+ "ticks": ""
688
+ }
689
+ },
690
+ "type": "scattergeo"
691
+ }
692
+ ],
693
+ "scattergl": [
694
+ {
695
+ "marker": {
696
+ "colorbar": {
697
+ "outlinewidth": 0,
698
+ "ticks": ""
699
+ }
700
+ },
701
+ "type": "scattergl"
702
+ }
703
+ ],
704
+ "scattermapbox": [
705
+ {
706
+ "marker": {
707
+ "colorbar": {
708
+ "outlinewidth": 0,
709
+ "ticks": ""
710
+ }
711
+ },
712
+ "type": "scattermapbox"
713
+ }
714
+ ],
715
+ "scatterpolar": [
716
+ {
717
+ "marker": {
718
+ "colorbar": {
719
+ "outlinewidth": 0,
720
+ "ticks": ""
721
+ }
722
+ },
723
+ "type": "scatterpolar"
724
+ }
725
+ ],
726
+ "scatterpolargl": [
727
+ {
728
+ "marker": {
729
+ "colorbar": {
730
+ "outlinewidth": 0,
731
+ "ticks": ""
732
+ }
733
+ },
734
+ "type": "scatterpolargl"
735
+ }
736
+ ],
737
+ "scatterternary": [
738
+ {
739
+ "marker": {
740
+ "colorbar": {
741
+ "outlinewidth": 0,
742
+ "ticks": ""
743
+ }
744
+ },
745
+ "type": "scatterternary"
746
+ }
747
+ ],
748
+ "surface": [
749
+ {
750
+ "colorbar": {
751
+ "outlinewidth": 0,
752
+ "ticks": ""
753
+ },
754
+ "colorscale": [
755
+ [
756
+ 0,
757
+ "#0d0887"
758
+ ],
759
+ [
760
+ 0.1111111111111111,
761
+ "#46039f"
762
+ ],
763
+ [
764
+ 0.2222222222222222,
765
+ "#7201a8"
766
+ ],
767
+ [
768
+ 0.3333333333333333,
769
+ "#9c179e"
770
+ ],
771
+ [
772
+ 0.4444444444444444,
773
+ "#bd3786"
774
+ ],
775
+ [
776
+ 0.5555555555555556,
777
+ "#d8576b"
778
+ ],
779
+ [
780
+ 0.6666666666666666,
781
+ "#ed7953"
782
+ ],
783
+ [
784
+ 0.7777777777777778,
785
+ "#fb9f3a"
786
+ ],
787
+ [
788
+ 0.8888888888888888,
789
+ "#fdca26"
790
+ ],
791
+ [
792
+ 1,
793
+ "#f0f921"
794
+ ]
795
+ ],
796
+ "type": "surface"
797
+ }
798
+ ],
799
+ "table": [
800
+ {
801
+ "cells": {
802
+ "fill": {
803
+ "color": "#EBF0F8"
804
+ },
805
+ "line": {
806
+ "color": "white"
807
+ }
808
+ },
809
+ "header": {
810
+ "fill": {
811
+ "color": "#C8D4E3"
812
+ },
813
+ "line": {
814
+ "color": "white"
815
+ }
816
+ },
817
+ "type": "table"
818
+ }
819
+ ]
820
+ },
821
+ "layout": {
822
+ "annotationdefaults": {
823
+ "arrowcolor": "#2a3f5f",
824
+ "arrowhead": 0,
825
+ "arrowwidth": 1
826
+ },
827
+ "autotypenumbers": "strict",
828
+ "coloraxis": {
829
+ "colorbar": {
830
+ "outlinewidth": 0,
831
+ "ticks": ""
832
+ }
833
+ },
834
+ "colorscale": {
835
+ "diverging": [
836
+ [
837
+ 0,
838
+ "#8e0152"
839
+ ],
840
+ [
841
+ 0.1,
842
+ "#c51b7d"
843
+ ],
844
+ [
845
+ 0.2,
846
+ "#de77ae"
847
+ ],
848
+ [
849
+ 0.3,
850
+ "#f1b6da"
851
+ ],
852
+ [
853
+ 0.4,
854
+ "#fde0ef"
855
+ ],
856
+ [
857
+ 0.5,
858
+ "#f7f7f7"
859
+ ],
860
+ [
861
+ 0.6,
862
+ "#e6f5d0"
863
+ ],
864
+ [
865
+ 0.7,
866
+ "#b8e186"
867
+ ],
868
+ [
869
+ 0.8,
870
+ "#7fbc41"
871
+ ],
872
+ [
873
+ 0.9,
874
+ "#4d9221"
875
+ ],
876
+ [
877
+ 1,
878
+ "#276419"
879
+ ]
880
+ ],
881
+ "sequential": [
882
+ [
883
+ 0,
884
+ "#0d0887"
885
+ ],
886
+ [
887
+ 0.1111111111111111,
888
+ "#46039f"
889
+ ],
890
+ [
891
+ 0.2222222222222222,
892
+ "#7201a8"
893
+ ],
894
+ [
895
+ 0.3333333333333333,
896
+ "#9c179e"
897
+ ],
898
+ [
899
+ 0.4444444444444444,
900
+ "#bd3786"
901
+ ],
902
+ [
903
+ 0.5555555555555556,
904
+ "#d8576b"
905
+ ],
906
+ [
907
+ 0.6666666666666666,
908
+ "#ed7953"
909
+ ],
910
+ [
911
+ 0.7777777777777778,
912
+ "#fb9f3a"
913
+ ],
914
+ [
915
+ 0.8888888888888888,
916
+ "#fdca26"
917
+ ],
918
+ [
919
+ 1,
920
+ "#f0f921"
921
+ ]
922
+ ],
923
+ "sequentialminus": [
924
+ [
925
+ 0,
926
+ "#0d0887"
927
+ ],
928
+ [
929
+ 0.1111111111111111,
930
+ "#46039f"
931
+ ],
932
+ [
933
+ 0.2222222222222222,
934
+ "#7201a8"
935
+ ],
936
+ [
937
+ 0.3333333333333333,
938
+ "#9c179e"
939
+ ],
940
+ [
941
+ 0.4444444444444444,
942
+ "#bd3786"
943
+ ],
944
+ [
945
+ 0.5555555555555556,
946
+ "#d8576b"
947
+ ],
948
+ [
949
+ 0.6666666666666666,
950
+ "#ed7953"
951
+ ],
952
+ [
953
+ 0.7777777777777778,
954
+ "#fb9f3a"
955
+ ],
956
+ [
957
+ 0.8888888888888888,
958
+ "#fdca26"
959
+ ],
960
+ [
961
+ 1,
962
+ "#f0f921"
963
+ ]
964
+ ]
965
+ },
966
+ "colorway": [
967
+ "#636efa",
968
+ "#EF553B",
969
+ "#00cc96",
970
+ "#ab63fa",
971
+ "#FFA15A",
972
+ "#19d3f3",
973
+ "#FF6692",
974
+ "#B6E880",
975
+ "#FF97FF",
976
+ "#FECB52"
977
+ ],
978
+ "font": {
979
+ "color": "#2a3f5f"
980
+ },
981
+ "geo": {
982
+ "bgcolor": "white",
983
+ "lakecolor": "white",
984
+ "landcolor": "#E5ECF6",
985
+ "showlakes": true,
986
+ "showland": true,
987
+ "subunitcolor": "white"
988
+ },
989
+ "hoverlabel": {
990
+ "align": "left"
991
+ },
992
+ "hovermode": "closest",
993
+ "mapbox": {
994
+ "style": "light"
995
+ },
996
+ "paper_bgcolor": "white",
997
+ "plot_bgcolor": "#E5ECF6",
998
+ "polar": {
999
+ "angularaxis": {
1000
+ "gridcolor": "white",
1001
+ "linecolor": "white",
1002
+ "ticks": ""
1003
+ },
1004
+ "bgcolor": "#E5ECF6",
1005
+ "radialaxis": {
1006
+ "gridcolor": "white",
1007
+ "linecolor": "white",
1008
+ "ticks": ""
1009
+ }
1010
+ },
1011
+ "scene": {
1012
+ "xaxis": {
1013
+ "backgroundcolor": "#E5ECF6",
1014
+ "gridcolor": "white",
1015
+ "gridwidth": 2,
1016
+ "linecolor": "white",
1017
+ "showbackground": true,
1018
+ "ticks": "",
1019
+ "zerolinecolor": "white"
1020
+ },
1021
+ "yaxis": {
1022
+ "backgroundcolor": "#E5ECF6",
1023
+ "gridcolor": "white",
1024
+ "gridwidth": 2,
1025
+ "linecolor": "white",
1026
+ "showbackground": true,
1027
+ "ticks": "",
1028
+ "zerolinecolor": "white"
1029
+ },
1030
+ "zaxis": {
1031
+ "backgroundcolor": "#E5ECF6",
1032
+ "gridcolor": "white",
1033
+ "gridwidth": 2,
1034
+ "linecolor": "white",
1035
+ "showbackground": true,
1036
+ "ticks": "",
1037
+ "zerolinecolor": "white"
1038
+ }
1039
+ },
1040
+ "shapedefaults": {
1041
+ "line": {
1042
+ "color": "#2a3f5f"
1043
+ }
1044
+ },
1045
+ "ternary": {
1046
+ "aaxis": {
1047
+ "gridcolor": "white",
1048
+ "linecolor": "white",
1049
+ "ticks": ""
1050
+ },
1051
+ "baxis": {
1052
+ "gridcolor": "white",
1053
+ "linecolor": "white",
1054
+ "ticks": ""
1055
+ },
1056
+ "bgcolor": "#E5ECF6",
1057
+ "caxis": {
1058
+ "gridcolor": "white",
1059
+ "linecolor": "white",
1060
+ "ticks": ""
1061
+ }
1062
+ },
1063
+ "title": {
1064
+ "x": 0.05
1065
+ },
1066
+ "xaxis": {
1067
+ "automargin": true,
1068
+ "gridcolor": "white",
1069
+ "linecolor": "white",
1070
+ "ticks": "",
1071
+ "title": {
1072
+ "standoff": 15
1073
+ },
1074
+ "zerolinecolor": "white",
1075
+ "zerolinewidth": 2
1076
+ },
1077
+ "yaxis": {
1078
+ "automargin": true,
1079
+ "gridcolor": "white",
1080
+ "linecolor": "white",
1081
+ "ticks": "",
1082
+ "title": {
1083
+ "standoff": 15
1084
+ },
1085
+ "zerolinecolor": "white",
1086
+ "zerolinewidth": 2
1087
+ }
1088
+ }
1089
+ },
1090
+ "xaxis": {
1091
+ "anchor": "y",
1092
+ "domain": [
1093
+ 0,
1094
+ 1
1095
+ ],
1096
+ "title": {
1097
+ "text": "x"
1098
+ }
1099
+ },
1100
+ "yaxis": {
1101
+ "anchor": "x",
1102
+ "domain": [
1103
+ 0,
1104
+ 1
1105
+ ],
1106
+ "title": {
1107
+ "text": "y"
1108
+ }
1109
+ }
1110
+ }
1111
+ }
1112
+ },
1113
+ "metadata": {},
1114
+ "output_type": "display_data"
1115
+ }
1116
+ ],
1117
+ "source": [
1118
+ "import numpy as np\n",
1119
+ "import plotly.express as px\n",
1120
+ "\n",
1121
+ "def farthest_first_traversal(points, k, return_type='indices'):\n",
1122
+ " if return_type == 'values': \n",
1123
+ " # randomly select first point from the dataset\n",
1124
+ " centers = [points[np.random.randint(0, len(points))]]\n",
1125
+ " \n",
1126
+ " while len(centers) < k:\n",
1127
+ " # compute distances from all points to the selected centers\n",
1128
+ " distances = np.array([min([np.linalg.norm(p-c) for c in centers]) for p in points])\n",
1129
+ " \n",
1130
+ " # select the point farthest away from the centers\n",
1131
+ " farthest = points[np.argmax(distances)]\n",
1132
+ " \n",
1133
+ " # add the farthest point to the selected centers\n",
1134
+ " centers.append(farthest)\n",
1135
+ " \n",
1136
+ " return np.array(centers) #given a list, directly returns values\n",
1137
+ "\n",
1138
+ " elif return_type == 'indices':\n",
1139
+ " # randomly select first point from the dataset\n",
1140
+ " centers = [np.random.randint(0, len(points))]\n",
1141
+ " indices = [centers[0]]\n",
1142
+ " \n",
1143
+ " while len(centers) < k:\n",
1144
+ " # compute distances from all points to the selected centers\n",
1145
+ " distances = np.array([min([np.linalg.norm(points[p]-points[c]) for c in centers]) for p in range(len(points))])\n",
1146
+ " \n",
1147
+ " # select the point farthest away from the centers\n",
1148
+ " farthest = np.argmax(distances)\n",
1149
+ " \n",
1150
+ " # add the farthest point to the selected centers\n",
1151
+ " centers.append(farthest)\n",
1152
+ " indices.append(farthest)\n",
1153
+ " \n",
1154
+ " return np.array(indices) #given a list, returns the indices of the list\n",
1155
+ "\n",
1156
+ "# generate sample data\n",
1157
+ "# np.random.seed(42)\n",
1158
+ "points = np.random.randn(100, 20) #last param is the n. dimensions\n",
1159
+ "\n",
1160
+ "centers = farthest_first_traversal(points, k=10, return_type='values')\n",
1161
+ "# visualize results using Plotly\n",
1162
+ "fig = px.scatter(x=points[:,0], y=points[:,1])\n",
1163
+ "fig.add_scatter(x=centers[:,0], y=centers[:,1], mode='markers', marker=dict(size=10, color='red'))\n",
1164
+ "fig.show()"
1165
+ ]
1166
+ },
1167
+ {
1168
+ "cell_type": "code",
1169
+ "execution_count": 1,
1170
+ "metadata": {},
1171
+ "outputs": [
1172
+ {
1173
+ "name": "stderr",
1174
+ "output_type": "stream",
1175
+ "text": [
1176
+ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\onnx\\_internal\\_beartype.py:30: UserWarning: module 'beartype.roar' has no attribute 'BeartypeDecorHintPep585DeprecationWarning'\n",
1177
+ " warnings.warn(f\"{e}\")\n"
1178
+ ]
1179
+ }
1180
+ ],
1181
+ "source": [
1182
+ "import os\n",
1183
+ "# os.system('pip install openpyxl')\n",
1184
+ "# os.system('pip install sentence-transformers')\n",
1185
+ "import pandas as pd\n",
1186
+ "import gradio as gr\n",
1187
+ "from sentence_transformers import SentenceTransformer\n",
1188
+ "\n",
1189
+ "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n",
1190
+ "\n",
1191
+ "df = pd.read_parquet('df_encoded3.parquet')\n",
1192
+ "df['tags'] = df['tags'].apply(lambda x : str(x))\n",
1193
+ "def parse_raised(x):\n",
1194
+ " if x == 'Undisclosed':\n",
1195
+ " return 0\n",
1196
+ " else: \n",
1197
+ " quantifier = x[-1]\n",
1198
+ " x = float(x[1:-1])\n",
1199
+ " if quantifier == 'K':\n",
1200
+ " return x/1000\n",
1201
+ " elif quantifier == 'M':\n",
1202
+ " return x\n",
1203
+ "df['raised'] = df['raised'].apply(lambda x : parse_raised(x))\n",
1204
+ "df['stage'] = df['stage'].apply(lambda x : x.lower())\n",
1205
+ "df = df.reset_index(drop=True)\n",
1206
+ "\n",
1207
+ "from sklearn.neighbors import NearestNeighbors\n",
1208
+ "import pandas as pd\n",
1209
+ "from sentence_transformers import SentenceTransformer\n",
1210
+ "\n",
1211
+ "nbrs = NearestNeighbors(n_neighbors=5000, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n",
1212
+ "\n",
1213
+ "def search(df, query):\n",
1214
+ " product = model.encode(query).tolist()\n",
1215
+ " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n",
1216
+ "\n",
1217
+ " #prepare model\n",
1218
+ " # \n",
1219
+ " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n",
1220
+ "\n",
1221
+ " #print out the description of every recommended product\n",
1222
+ " return df.iloc[list(indices)[0]][['name', 'raised', 'target', 'size', 'stage', 'country', 'source', 'description', 'tags', 'text_vector_']]\n",
1223
+ "\n",
1224
+ "def filter_df(df, column_name, filter_type, filter_value, minimum_acceptable_size=0):\n",
1225
+ " if filter_type == '==':\n",
1226
+ " df_filtered = df[df[column_name]==filter_value]\n",
1227
+ " elif filter_type == '>=':\n",
1228
+ " df_filtered = df[df[column_name]>=filter_value]\n",
1229
+ " elif filter_type == '<=':\n",
1230
+ " df_filtered = df[df[column_name]<=filter_value]\n",
1231
+ " elif filter_type == 'contains':\n",
1232
+ " df_filtered = df[df['target'].str.contains(filter_value)]\n",
1233
+ "\n",
1234
+ " if df_filtered.size >= minimum_acceptable_size:\n",
1235
+ " return df_filtered\n",
1236
+ " else:\n",
1237
+ " return df\n",
1238
+ "\n",
1239
+ "import pandas as pd\n",
1240
+ "import numpy as np\n",
1241
+ "from sentence_transformers import SentenceTransformer\n",
1242
+ "from sklearn.metrics.pairwise import cosine_similarity\n",
1243
+ "\n",
1244
+ "def score_filter(df, query, min_score):\n",
1245
+ " # Define function to compute cosine similarity between two vectors\n",
1246
+ " def cosine_sim(query, vector):\n",
1247
+ " return cosine_similarity([query], [vector])[0][0]\n",
1248
+ "\n",
1249
+ " # df_results = search(df, 'age reversing')[0:50]\n",
1250
+ " vector_col = np.array(df['text_vector_'].tolist())\n",
1251
+ "\n",
1252
+ " # Define query vector\n",
1253
+ " query = model.encode([query])[0]\n",
1254
+ "\n",
1255
+ " # Compute cosine similarity between query vector and every sample vector\n",
1256
+ " df['similarity'] = np.apply_along_axis(cosine_sim, 1, vector_col, query)\n",
1257
+ " df = df[df['similarity']>=min_score]\n",
1258
+ " return df"
1259
+ ]
1260
+ },
1261
+ {
1262
+ "cell_type": "code",
1263
+ "execution_count": 4,
1264
+ "metadata": {},
1265
+ "outputs": [
1266
+ {
1267
+ "data": {
1268
+ "text/plain": [
1269
+ "array([['Klogene Therapeutics, Inc.',\n",
1270
+ " 'Prevention and treatment of age related diseases'],\n",
1271
+ " ['Inverse',\n",
1272
+ " 'Inverse is changing the way women all around the world condition and care for their hair.'],\n",
1273
+ " ['AGELON М',\n",
1274
+ " 'Agelon M is the service of online investigations based on automated targeting, verification and surveying respondents in social networks.'],\n",
1275
+ " ['Age of Learning',\n",
1276
+ " 'Age of Learning blends education best practices, innovative technology, and insightful creativity to bring learning to life for children across the U.S. and around the world.'],\n",
1277
+ " ['Aprilage Inc',\n",
1278
+ " 'Seeing IS believing! Aprilage develops visualization software that shows people their \"future self\" and how their lifestyle of today will affect how they will look as they age. Our software, APRIL®, is currently used by 500 health providers, educators and insurers in more than 25 countries as a tool for health education about chronic disease prevention and behavior modification. APRIL® helps them demonstrate to their patients and clients the consequences of certain health and lifestyle choices (e.g. obesity, smoking, heavy sun exposure). Our clients tell us it works because it uses the best health motivator ever created – their own appearance. Two new APRIL® products are in final development for market launch - one for those engaged in the prevention and education about obesity, and the other for those in the aesthetics markets (plastic surgeons, dermatologists, and cosmetic companies).']],\n",
1279
+ " dtype=object)"
1280
+ ]
1281
+ },
1282
+ "execution_count": 4,
1283
+ "metadata": {},
1284
+ "output_type": "execute_result"
1285
+ }
1286
+ ],
1287
+ "source": [
1288
+ "df_results = search(df, 'age-reversing')[0:20]\n",
1289
+ "points = df_results['text_vector_'].values\n",
1290
+ "indices = farthest_first_traversal(points, k=5, return_type='indices')\n",
1291
+ "df_results.iloc[indices][['name', 'description']].values"
1292
+ ]
1293
+ },
1294
+ {
1295
+ "cell_type": "code",
1296
+ "execution_count": 48,
1297
+ "metadata": {},
1298
+ "outputs": [],
1299
+ "source": [
1300
+ "import requests\n",
1301
+ "\n",
1302
+ "def gpt3_question(api_key, prompt):\n",
1303
+ " api_endpoint = \"https://api.openai.com/v1/engines/text-davinci-003/completions\"\n",
1304
+ " headers = {\n",
1305
+ " \"Content-Type\": \"application/json\",\n",
1306
+ " \"Authorization\": f\"Bearer {api_key}\"\n",
1307
+ " }\n",
1308
+ " data = {\n",
1309
+ " \"prompt\": prompt,\n",
1310
+ " \"max_tokens\": 500,\n",
1311
+ " \"temperature\": 0.7\n",
1312
+ " }\n",
1313
+ " print('sending request')\n",
1314
+ " response = requests.post(api_endpoint, headers=headers, json=data)\n",
1315
+ " print(response.text)\n",
1316
+ " generated_text = response.json()[\"choices\"][0][\"text\"]\n",
1317
+ "\n",
1318
+ " return generated_text\n",
1319
+ "\n",
1320
+ "def competitor_analysis_foo(startup_array, max_paragraphs):\n",
1321
+ " prompt = f\"\"\"\n",
1322
+ " {str(startup_array)}\n",
1323
+ " This is a list of startups in the following format: [name, stage, description]:\n",
1324
+ "\n",
1325
+ " Write a {max_paragraphs} paragraph competitors analysis based on this data. Do not name the paragraphs.\n",
1326
+ " \"\"\"\n",
1327
+ " #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\n",
1328
+ " response = gpt3_question('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', prompt)\n",
1329
+ "\n",
1330
+ " for x in range(10):\n",
1331
+ " response = response.replace(f'Paragraph {x}:', '')\n",
1332
+ " response = response.replace(f'Paragraph {x}', '')\n",
1333
+ " response = response.replace('\\n\\n', '\\n').strip()\n",
1334
+ "\n",
1335
+ " # with open('competitor_analysis.txt', 'w') as file:\n",
1336
+ " # file.write(response)\n",
1337
+ " return response"
1338
+ ]
1339
+ },
1340
+ {
1341
+ "cell_type": "code",
1342
+ "execution_count": 51,
1343
+ "metadata": {},
1344
+ "outputs": [
1345
+ {
1346
+ "name": "stderr",
1347
+ "output_type": "stream",
1348
+ "text": [
1349
+ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n",
1350
+ " warnings.warn(\n"
1351
+ ]
1352
+ },
1353
+ {
1354
+ "name": "stdout",
1355
+ "output_type": "stream",
1356
+ "text": [
1357
+ "Running on local URL: http://127.0.0.1:7901\n",
1358
+ "\n",
1359
+ "To create a public link, set `share=True` in `launch()`.\n"
1360
+ ]
1361
+ },
1362
+ {
1363
+ "data": {
1364
+ "text/html": [
1365
+ "<div><iframe src=\"http://127.0.0.1:7901/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
1366
+ ],
1367
+ "text/plain": [
1368
+ "<IPython.core.display.HTML object>"
1369
+ ]
1370
+ },
1371
+ "metadata": {},
1372
+ "output_type": "display_data"
1373
+ },
1374
+ {
1375
+ "data": {
1376
+ "text/plain": []
1377
+ },
1378
+ "execution_count": 51,
1379
+ "metadata": {},
1380
+ "output_type": "execute_result"
1381
+ },
1382
+ {
1383
+ "name": "stdout",
1384
+ "output_type": "stream",
1385
+ "text": [
1386
+ "sending request\n",
1387
+ "{\"id\":\"cmpl-734OdouEI70awj0YgzW2v9fLQCusE\",\"object\":\"text_completion\",\"created\":1680966239,\"model\":\"text-davinci-003\",\"choices\":[{\"text\":\"\\n Paragraph 1: \\n There appears to be a large focus on the pre-seed stage in this list of startups. AgeRate, Klogene Therapeutics, Inc., Modern Age, Age Labs, Assured Allies, Spring Discovery, AgeNation, and Elevian are all pre-seed startups that provide a variety of services related to aging. AgeRate and Age Labs are focused on providing accurate and affordable epigenetic tests to reveal a person’s biological age. Klogene Therapeutics, Inc. is focused on prevention and treatment of age-related diseases. Modern Age is a health and wellness platform designed to make the journey of aging more manageable. Assured Allies is a company focused on successful aging. Spring Discovery is focused on drug discovery for age-related diseases, and AgeNation is a digital media company for baby boomers and seniors. Lastly, Elevian is a molecular diagnostics company that seeks to reduce the cost of developing drugs by accurately measuring biological age.\\n \\n Paragraph 2:\\n In addition, there is one startup in the C stage, Youth Laboratories, which is focused on machine vision and artificial intelligence for beauty and healthy longevity. This company specializes in processing digital images and videos to develop diseases biomarkers and to evaluate the physical well-being of a human and lifestyle. By harnessing the power of artificial intelligence, they seek to slow down or even reverse age-associated changes, particularly those related to skin. \\n \\n Paragraph 3:\\n Taken together, these startups provide a comprehensive range of services related to aging, from molecular diagnostics and drug discovery to digital media and artificial intelligence. They offer both prevention and treatment of age-related diseases, as well as ways to measure and manage the aging process. With the potential for these companies to make a positive impact on the aging process, it is likely that competition between them will increase in the future.\",\"index\":0,\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":482,\"completion_tokens\":392,\"total_tokens\":874}}\n",
1388
+ "\n"
1389
+ ]
1390
+ }
1391
+ ],
1392
+ "source": [
1393
+ "#the first module becomes text1, the second module file1\n",
1394
+ "def vector_search(size, target, stage, query, var_metadata, var_fresh): #greet('11-500+', 'B2B', 'pre-seed', 'age-reversing')\n",
1395
+ " def raised_zero(x):\n",
1396
+ " if x == 0:\n",
1397
+ " return 'Undisclosed'\n",
1398
+ " else:\n",
1399
+ " return x\n",
1400
+ " df_knn = search(df, query)\n",
1401
+ " df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))\n",
1402
+ "\n",
1403
+ " df_size = filter_df(df_knn, 'size', '==', size, 1)\n",
1404
+ "\n",
1405
+ " if stage != 'ALL':\n",
1406
+ " df_stage = filter_df(df_size, 'stage', '==', stage.lower(), 1)\n",
1407
+ " else:\n",
1408
+ " #we bypass the filter\n",
1409
+ " df_stage = df_size\n",
1410
+ "\n",
1411
+ " df_target = filter_df(df_stage, 'target', 'contains', target, 1)\n",
1412
+ " \n",
1413
+ " # display(df_stage)\n",
1414
+ " # df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]\n",
1415
+ "\n",
1416
+ " return df_target.drop('text_vector_', axis=1)[0:100], df_target[0:100], True #.sort_values('raised', ascending=False)\n",
1417
+ "\n",
1418
+ "def write_competitor_analysis(var_metadata, query, var_fresh):\n",
1419
+ "\n",
1420
+ " if var_fresh == True:\n",
1421
+ " df_final = score_filter(var_metadata, query, 0.35)\n",
1422
+ " df_final = df_final[['name', 'stage', 'description']][0:10].values.tolist()\n",
1423
+ "\n",
1424
+ " if len(df_final) == 0:\n",
1425
+ " # df_final = df_final[['name', 'stage', 'description']][0:3].values.tolist()\n",
1426
+ " # response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=1)\n",
1427
+ " response = 'score too low to output valid results'\n",
1428
+ " if len(df_final) >= 1 and len(df_final) <= 3:\n",
1429
+ " response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=1)\n",
1430
+ " elif len(df_final) > 3 and len(df_final) <= 5:\n",
1431
+ " response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=2)\n",
1432
+ " elif len(df_final) > 6:\n",
1433
+ " response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=3)\n",
1434
+ "\n",
1435
+ " return response, False #we reset fresh state\n",
1436
+ "\n",
1437
+ " else:\n",
1438
+ " return 'Perform a new Startup Search first', False #we reset fresh state\n",
1439
+ "\n",
1440
+ "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n",
1441
+ " gr.Markdown(\n",
1442
+ " \"\"\"\n",
1443
+ " # Startup Search Engine\n",
1444
+ " \"\"\"\n",
1445
+ " )\n",
1446
+ " var_fresh = gr.Variable(value=False)\n",
1447
+ " var_metadata = gr.Variable(value=0)\n",
1448
+ " var_query = gr.Variable(value=0)\n",
1449
+ " size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+', '11-500+'], multiselect=False, value='11-500+', label='size')\n",
1450
+ " target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], multiselect=False, value='B2B', label='target')\n",
1451
+ " stage = gr.Radio(['pre-seed', 'A', 'B', 'C', 'ALL'], multiselect=False, value='ALL', label='stage')\n",
1452
+ " # raised = gr.Slider(0, 20, value=5, step_size=1, label=\"Minimum raising (in Millions)\")\n",
1453
+ " query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')\n",
1454
+ " # competitor_analysis = gr.Radio(['write', 'do not write'], multiselect=False, value='do not write', label='write a competitor analysis')\n",
1455
+ "\n",
1456
+ " btn2 = gr.Button(value=\"Search for a Startup\")\n",
1457
+ " btn1 = gr.Button(value=\"Write a competitor analysis\")\n",
1458
+ " \n",
1459
+ " output1 = gr.Textbox(label='competitor analysis')\n",
1460
+ " output2 = gr.DataFrame(label='value')\n",
1461
+ "\n",
1462
+ " btn1.click(write_competitor_analysis, [var_metadata, query, var_fresh], [output1, var_fresh]) #competitor analysis\n",
1463
+ " btn2.click(vector_search, [size, target, stage, query, var_metadata, var_fresh], [output2, var_metadata, var_fresh]) #startup search\n",
1464
+ "\n",
1465
+ "demo.launch(share=False)"
1466
+ ]
1467
+ }
1468
+ ],
1469
+ "metadata": {
1470
+ "kernelspec": {
1471
+ "display_name": "Python 3",
1472
+ "language": "python",
1473
+ "name": "python3"
1474
+ },
1475
+ "language_info": {
1476
+ "codemirror_mode": {
1477
+ "name": "ipython",
1478
+ "version": 3
1479
+ },
1480
+ "file_extension": ".py",
1481
+ "mimetype": "text/x-python",
1482
+ "name": "python",
1483
+ "nbconvert_exporter": "python",
1484
+ "pygments_lexer": "ipython3",
1485
+ "version": "3.9.13"
1486
+ },
1487
+ "orig_nbformat": 4,
1488
+ "vscode": {
1489
+ "interpreter": {
1490
+ "hash": "fdf377d643bc1cb065454f0ad2ceac75d834452ecf289e7ba92c6b3f59a7cee1"
1491
+ }
1492
+ }
1493
+ },
1494
+ "nbformat": 4,
1495
+ "nbformat_minor": 2
1496
+ }
data_manipulation.ipynb CHANGED
@@ -324,7 +324,7 @@
324
  },
325
  {
326
  "cell_type": "code",
327
- "execution_count": 3,
328
  "metadata": {},
329
  "outputs": [
330
  {
@@ -348,16 +348,17 @@
348
  " <thead>\n",
349
  " <tr style=\"text-align: right;\">\n",
350
  " <th></th>\n",
351
- " <th>title</th>\n",
352
  " <th>description</th>\n",
353
  " <th>stage</th>\n",
354
- " <th>industry_name</th>\n",
355
  " <th>url</th>\n",
356
- " <th>country_slug</th>\n",
357
- " <th>city_slug</th>\n",
358
- " <th>location</th>\n",
359
- " <th>region_name</th>\n",
360
  " <th>text_vector_</th>\n",
 
 
 
 
361
  " </tr>\n",
362
  " </thead>\n",
363
  " <tbody>\n",
@@ -365,66 +366,71 @@
365
  " <th>0</th>\n",
366
  " <td>Digipal</td>\n",
367
  " <td>Digipal is a digital consultancy based in Tbil...</td>\n",
368
- " <td>0.0</td>\n",
369
- " <td>Software &amp; Data</td>\n",
370
  " <td>https://www.digipal.agency/</td>\n",
371
  " <td>georgia</td>\n",
372
- " <td>tbilisi</td>\n",
373
- " <td>Tbilisi, Georgia</td>\n",
374
- " <td>Europe</td>\n",
375
  " <td>[0.017287444323301315, 0.06208805367350578, -0...</td>\n",
 
 
 
 
376
  " </tr>\n",
377
  " <tr>\n",
378
  " <th>1</th>\n",
379
  " <td>BeatBind</td>\n",
380
  " <td>BeatBind is the industry's long overdue platfo...</td>\n",
381
- " <td>0.0</td>\n",
382
- " <td>Social &amp; Leisure</td>\n",
383
  " <td>https://beatbind.io/</td>\n",
384
  " <td>georgia</td>\n",
385
- " <td>tbilisi</td>\n",
386
- " <td>Tbilisi, Georgia</td>\n",
387
- " <td>Europe</td>\n",
388
  " <td>[-0.00438214186578989, -0.051213208585977554, ...</td>\n",
 
 
 
 
389
  " </tr>\n",
390
  " <tr>\n",
391
  " <th>2</th>\n",
392
  " <td>Smart Academy</td>\n",
393
  " <td>Smart Academy is a modern educational institut...</td>\n",
394
- " <td>0.0</td>\n",
395
- " <td>Edtech</td>\n",
396
  " <td>https://smartacademy.ge/</td>\n",
397
  " <td>georgia</td>\n",
398
- " <td>tbilisi</td>\n",
399
- " <td>Tbilisi, Georgia</td>\n",
400
- " <td>Europe</td>\n",
401
  " <td>[0.0005468669114634395, -0.05331585183739662, ...</td>\n",
 
 
 
 
402
  " </tr>\n",
403
  " <tr>\n",
404
  " <th>3</th>\n",
405
  " <td>MaxinAI</td>\n",
406
  " <td>MaxinAI isglobal AI development company that w...</td>\n",
407
- " <td>0.0</td>\n",
408
- " <td>Software &amp; Data</td>\n",
409
  " <td>https://www.maxinai.com/#all-industries</td>\n",
410
  " <td>georgia</td>\n",
411
- " <td>tbilisi</td>\n",
412
- " <td>Tbilisi, Georgia</td>\n",
413
- " <td>Europe</td>\n",
414
  " <td>[0.021948501467704773, 0.024166792631149292, -...</td>\n",
 
 
 
 
415
  " </tr>\n",
416
  " <tr>\n",
417
  " <th>4</th>\n",
418
  " <td>TLANCER</td>\n",
419
  " <td>Tlancer aims to create an unlimited educationa...</td>\n",
420
- " <td>0.0</td>\n",
421
- " <td>Edtech</td>\n",
422
  " <td>https://www.tlancer.ge/</td>\n",
423
  " <td>georgia</td>\n",
424
- " <td>tbilisi</td>\n",
425
- " <td>Tbilisi, Georgia</td>\n",
426
- " <td>Europe</td>\n",
427
  " <td>[0.02025573141872883, -0.022812215611338615, -...</td>\n",
 
 
 
 
428
  " </tr>\n",
429
  " <tr>\n",
430
  " <th>...</th>\n",
@@ -438,79 +444,85 @@
438
  " <td>...</td>\n",
439
  " <td>...</td>\n",
440
  " <td>...</td>\n",
 
441
  " </tr>\n",
442
  " <tr>\n",
443
  " <th>94521</th>\n",
444
  " <td>OneTwo</td>\n",
445
  " <td>klkdčksč kdč skdčlsk čdksčd ksčk dčskdčk čdk</td>\n",
446
- " <td>0.0</td>\n",
447
- " <td>Software &amp; Data</td>\n",
448
  " <td>www.nethr</td>\n",
449
  " <td>croatia</td>\n",
450
- " <td>zagreb</td>\n",
451
- " <td>Zagreb, Croatia</td>\n",
452
- " <td>Europe</td>\n",
453
  " <td>[0.07235302031040192, -0.05674564838409424, -0...</td>\n",
 
 
 
 
454
  " </tr>\n",
455
  " <tr>\n",
456
  " <th>94522</th>\n",
457
  " <td>Trialfire</td>\n",
458
  " <td>Engaged trialers turn into customers, engaged ...</td>\n",
459
- " <td>0.0</td>\n",
460
- " <td>Software &amp; Data</td>\n",
461
  " <td>http://www.trialfire.com</td>\n",
462
  " <td>canada</td>\n",
463
- " <td>toronto</td>\n",
464
- " <td>Toronto, Canada</td>\n",
465
- " <td>North America</td>\n",
466
  " <td>[0.030764097347855568, 0.054082825779914856, -...</td>\n",
 
 
 
 
467
  " </tr>\n",
468
  " <tr>\n",
469
  " <th>94523</th>\n",
470
  " <td>ILLUMAGEAR</td>\n",
471
  " <td>ILLUMAGEAR’s mission is to illuminate people a...</td>\n",
472
- " <td>0.0</td>\n",
473
- " <td>Software &amp; Data</td>\n",
474
  " <td>http://www.illumagear.com</td>\n",
475
  " <td>united-states</td>\n",
476
- " <td>seattle</td>\n",
477
- " <td>Seattle, United States</td>\n",
478
- " <td>North America</td>\n",
479
  " <td>[0.015447210520505905, -0.0984775498509407, 0....</td>\n",
 
 
 
 
480
  " </tr>\n",
481
  " <tr>\n",
482
  " <th>94524</th>\n",
483
  " <td>Knowillage</td>\n",
484
  " <td>Knowillage lets you add personalization to you...</td>\n",
485
- " <td>0.0</td>\n",
486
- " <td>Edtech</td>\n",
487
  " <td>http://www.knowillage.com</td>\n",
488
  " <td>canada</td>\n",
489
- " <td>vancouver</td>\n",
490
- " <td>Vancouver, Canada</td>\n",
491
- " <td>North America</td>\n",
492
  " <td>[0.007970919832587242, -0.04347420111298561, -...</td>\n",
 
 
 
 
493
  " </tr>\n",
494
  " <tr>\n",
495
  " <th>94525</th>\n",
496
  " <td>Iris Holidays</td>\n",
497
  " <td>Iris Holidays is a full service Kerala tours o...</td>\n",
498
- " <td>0.0</td>\n",
499
- " <td>Software &amp; Data</td>\n",
500
  " <td>http://www.irisholidays.com</td>\n",
501
  " <td>india</td>\n",
502
- " <td>kochi</td>\n",
503
- " <td>Kochi, India</td>\n",
504
- " <td>Asia Pacific</td>\n",
505
  " <td>[0.0032976483926177025, -0.010843133553862572,...</td>\n",
 
 
 
 
506
  " </tr>\n",
507
  " </tbody>\n",
508
  "</table>\n",
509
- "<p>94526 rows × 10 columns</p>\n",
510
  "</div>"
511
  ],
512
  "text/plain": [
513
- " title description \\\n",
514
  "0 Digipal Digipal is a digital consultancy based in Tbil... \n",
515
  "1 BeatBind BeatBind is the industry's long overdue platfo... \n",
516
  "2 Smart Academy Smart Academy is a modern educational institut... \n",
@@ -523,49 +535,49 @@
523
  "94524 Knowillage Knowillage lets you add personalization to you... \n",
524
  "94525 Iris Holidays Iris Holidays is a full service Kerala tours o... \n",
525
  "\n",
526
- " stage industry_name url \\\n",
527
- "0 0.0 Software & Data https://www.digipal.agency/ \n",
528
- "1 0.0 Social & Leisure https://beatbind.io/ \n",
529
- "2 0.0 Edtech https://smartacademy.ge/ \n",
530
- "3 0.0 Software & Data https://www.maxinai.com/#all-industries \n",
531
- "4 0.0 Edtech https://www.tlancer.ge/ \n",
532
- "... ... ... ... \n",
533
- "94521 0.0 Software & Data www.nethr \n",
534
- "94522 0.0 Software & Data http://www.trialfire.com \n",
535
- "94523 0.0 Software & Data http://www.illumagear.com \n",
536
- "94524 0.0 Edtech http://www.knowillage.com \n",
537
- "94525 0.0 Software & Data http://www.irisholidays.com \n",
538
  "\n",
539
- " country_slug city_slug location region_name \\\n",
540
- "0 georgia tbilisi Tbilisi, Georgia Europe \n",
541
- "1 georgia tbilisi Tbilisi, Georgia Europe \n",
542
- "2 georgia tbilisi Tbilisi, Georgia Europe \n",
543
- "3 georgia tbilisi Tbilisi, Georgia Europe \n",
544
- "4 georgia tbilisi Tbilisi, Georgia Europe \n",
545
- "... ... ... ... ... \n",
546
- "94521 croatia zagreb Zagreb, Croatia Europe \n",
547
- "94522 canada toronto Toronto, Canada North America \n",
548
- "94523 united-states seattle Seattle, United States North America \n",
549
- "94524 canada vancouver Vancouver, Canada North America \n",
550
- "94525 india kochi Kochi, India Asia Pacific \n",
551
  "\n",
552
- " text_vector_ \n",
553
- "0 [0.017287444323301315, 0.06208805367350578, -0... \n",
554
- "1 [-0.00438214186578989, -0.051213208585977554, ... \n",
555
- "2 [0.0005468669114634395, -0.05331585183739662, ... \n",
556
- "3 [0.021948501467704773, 0.024166792631149292, -... \n",
557
- "4 [0.02025573141872883, -0.022812215611338615, -... \n",
558
- "... ... \n",
559
- "94521 [0.07235302031040192, -0.05674564838409424, -0... \n",
560
- "94522 [0.030764097347855568, 0.054082825779914856, -... \n",
561
- "94523 [0.015447210520505905, -0.0984775498509407, 0.... \n",
562
- "94524 [0.007970919832587242, -0.04347420111298561, -... \n",
563
- "94525 [0.0032976483926177025, -0.010843133553862572,... \n",
564
  "\n",
565
- "[94526 rows x 10 columns]"
566
  ]
567
  },
568
- "execution_count": 3,
569
  "metadata": {},
570
  "output_type": "execute_result"
571
  }
@@ -581,12 +593,525 @@
581
  "}\n",
582
  "\n",
583
  "df2 = pd.read_parquet('df_encoded2.parquet')\n",
584
- "df2.columns = [['name', 'description', 'stage', 'industry_name', 'url', 'country_slug', 'text_vector_']]\n",
585
  "df2['stage'] = df2['stage'].apply(lambda x : stage_dict[x])\n",
586
  "df2['raised'] = 'Undisclosed'\n",
587
  "df2['size'] = '11-500+'\n",
588
  "df2['source'] = 'https://www.startupblink.com'\n",
589
- "df2.columns = [['name', 'description', 'stage', 'tags', 'url', 'country_slug', 'text_vector_', 'raised', 'size', 'source']]"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
590
  ]
591
  },
592
  {
 
324
  },
325
  {
326
  "cell_type": "code",
327
+ "execution_count": 91,
328
  "metadata": {},
329
  "outputs": [
330
  {
 
348
  " <thead>\n",
349
  " <tr style=\"text-align: right;\">\n",
350
  " <th></th>\n",
351
+ " <th>name</th>\n",
352
  " <th>description</th>\n",
353
  " <th>stage</th>\n",
354
+ " <th>tags</th>\n",
355
  " <th>url</th>\n",
356
+ " <th>country</th>\n",
 
 
 
357
  " <th>text_vector_</th>\n",
358
+ " <th>raised</th>\n",
359
+ " <th>size</th>\n",
360
+ " <th>source</th>\n",
361
+ " <th>target</th>\n",
362
  " </tr>\n",
363
  " </thead>\n",
364
  " <tbody>\n",
 
366
  " <th>0</th>\n",
367
  " <td>Digipal</td>\n",
368
  " <td>Digipal is a digital consultancy based in Tbil...</td>\n",
369
+ " <td>pre-seed</td>\n",
370
+ " <td>[software, data]</td>\n",
371
  " <td>https://www.digipal.agency/</td>\n",
372
  " <td>georgia</td>\n",
 
 
 
373
  " <td>[0.017287444323301315, 0.06208805367350578, -0...</td>\n",
374
+ " <td>Undisclosed</td>\n",
375
+ " <td>11-500+</td>\n",
376
+ " <td>https://www.startupblink.com</td>\n",
377
+ " <td>Undisclosed</td>\n",
378
  " </tr>\n",
379
  " <tr>\n",
380
  " <th>1</th>\n",
381
  " <td>BeatBind</td>\n",
382
  " <td>BeatBind is the industry's long overdue platfo...</td>\n",
383
+ " <td>pre-seed</td>\n",
384
+ " <td>[social, leisure]</td>\n",
385
  " <td>https://beatbind.io/</td>\n",
386
  " <td>georgia</td>\n",
 
 
 
387
  " <td>[-0.00438214186578989, -0.051213208585977554, ...</td>\n",
388
+ " <td>Undisclosed</td>\n",
389
+ " <td>11-500+</td>\n",
390
+ " <td>https://www.startupblink.com</td>\n",
391
+ " <td>Undisclosed</td>\n",
392
  " </tr>\n",
393
  " <tr>\n",
394
  " <th>2</th>\n",
395
  " <td>Smart Academy</td>\n",
396
  " <td>Smart Academy is a modern educational institut...</td>\n",
397
+ " <td>pre-seed</td>\n",
398
+ " <td>[edtech]</td>\n",
399
  " <td>https://smartacademy.ge/</td>\n",
400
  " <td>georgia</td>\n",
 
 
 
401
  " <td>[0.0005468669114634395, -0.05331585183739662, ...</td>\n",
402
+ " <td>Undisclosed</td>\n",
403
+ " <td>11-500+</td>\n",
404
+ " <td>https://www.startupblink.com</td>\n",
405
+ " <td>Undisclosed</td>\n",
406
  " </tr>\n",
407
  " <tr>\n",
408
  " <th>3</th>\n",
409
  " <td>MaxinAI</td>\n",
410
  " <td>MaxinAI isglobal AI development company that w...</td>\n",
411
+ " <td>pre-seed</td>\n",
412
+ " <td>[software, data]</td>\n",
413
  " <td>https://www.maxinai.com/#all-industries</td>\n",
414
  " <td>georgia</td>\n",
 
 
 
415
  " <td>[0.021948501467704773, 0.024166792631149292, -...</td>\n",
416
+ " <td>Undisclosed</td>\n",
417
+ " <td>11-500+</td>\n",
418
+ " <td>https://www.startupblink.com</td>\n",
419
+ " <td>Undisclosed</td>\n",
420
  " </tr>\n",
421
  " <tr>\n",
422
  " <th>4</th>\n",
423
  " <td>TLANCER</td>\n",
424
  " <td>Tlancer aims to create an unlimited educationa...</td>\n",
425
+ " <td>pre-seed</td>\n",
426
+ " <td>[edtech]</td>\n",
427
  " <td>https://www.tlancer.ge/</td>\n",
428
  " <td>georgia</td>\n",
 
 
 
429
  " <td>[0.02025573141872883, -0.022812215611338615, -...</td>\n",
430
+ " <td>Undisclosed</td>\n",
431
+ " <td>11-500+</td>\n",
432
+ " <td>https://www.startupblink.com</td>\n",
433
+ " <td>Undisclosed</td>\n",
434
  " </tr>\n",
435
  " <tr>\n",
436
  " <th>...</th>\n",
 
444
  " <td>...</td>\n",
445
  " <td>...</td>\n",
446
  " <td>...</td>\n",
447
+ " <td>...</td>\n",
448
  " </tr>\n",
449
  " <tr>\n",
450
  " <th>94521</th>\n",
451
  " <td>OneTwo</td>\n",
452
  " <td>klkdčksč kdč skdčlsk čdksčd ksčk dčskdčk čdk</td>\n",
453
+ " <td>pre-seed</td>\n",
454
+ " <td>[software, data]</td>\n",
455
  " <td>www.nethr</td>\n",
456
  " <td>croatia</td>\n",
 
 
 
457
  " <td>[0.07235302031040192, -0.05674564838409424, -0...</td>\n",
458
+ " <td>Undisclosed</td>\n",
459
+ " <td>11-500+</td>\n",
460
+ " <td>https://www.startupblink.com</td>\n",
461
+ " <td>Undisclosed</td>\n",
462
  " </tr>\n",
463
  " <tr>\n",
464
  " <th>94522</th>\n",
465
  " <td>Trialfire</td>\n",
466
  " <td>Engaged trialers turn into customers, engaged ...</td>\n",
467
+ " <td>pre-seed</td>\n",
468
+ " <td>[software, data]</td>\n",
469
  " <td>http://www.trialfire.com</td>\n",
470
  " <td>canada</td>\n",
 
 
 
471
  " <td>[0.030764097347855568, 0.054082825779914856, -...</td>\n",
472
+ " <td>Undisclosed</td>\n",
473
+ " <td>11-500+</td>\n",
474
+ " <td>https://www.startupblink.com</td>\n",
475
+ " <td>Undisclosed</td>\n",
476
  " </tr>\n",
477
  " <tr>\n",
478
  " <th>94523</th>\n",
479
  " <td>ILLUMAGEAR</td>\n",
480
  " <td>ILLUMAGEAR’s mission is to illuminate people a...</td>\n",
481
+ " <td>pre-seed</td>\n",
482
+ " <td>[software, data]</td>\n",
483
  " <td>http://www.illumagear.com</td>\n",
484
  " <td>united-states</td>\n",
 
 
 
485
  " <td>[0.015447210520505905, -0.0984775498509407, 0....</td>\n",
486
+ " <td>Undisclosed</td>\n",
487
+ " <td>11-500+</td>\n",
488
+ " <td>https://www.startupblink.com</td>\n",
489
+ " <td>Undisclosed</td>\n",
490
  " </tr>\n",
491
  " <tr>\n",
492
  " <th>94524</th>\n",
493
  " <td>Knowillage</td>\n",
494
  " <td>Knowillage lets you add personalization to you...</td>\n",
495
+ " <td>pre-seed</td>\n",
496
+ " <td>[edtech]</td>\n",
497
  " <td>http://www.knowillage.com</td>\n",
498
  " <td>canada</td>\n",
 
 
 
499
  " <td>[0.007970919832587242, -0.04347420111298561, -...</td>\n",
500
+ " <td>Undisclosed</td>\n",
501
+ " <td>11-500+</td>\n",
502
+ " <td>https://www.startupblink.com</td>\n",
503
+ " <td>Undisclosed</td>\n",
504
  " </tr>\n",
505
  " <tr>\n",
506
  " <th>94525</th>\n",
507
  " <td>Iris Holidays</td>\n",
508
  " <td>Iris Holidays is a full service Kerala tours o...</td>\n",
509
+ " <td>pre-seed</td>\n",
510
+ " <td>[software, data]</td>\n",
511
  " <td>http://www.irisholidays.com</td>\n",
512
  " <td>india</td>\n",
 
 
 
513
  " <td>[0.0032976483926177025, -0.010843133553862572,...</td>\n",
514
+ " <td>Undisclosed</td>\n",
515
+ " <td>11-500+</td>\n",
516
+ " <td>https://www.startupblink.com</td>\n",
517
+ " <td>Undisclosed</td>\n",
518
  " </tr>\n",
519
  " </tbody>\n",
520
  "</table>\n",
521
+ "<p>94526 rows × 11 columns</p>\n",
522
  "</div>"
523
  ],
524
  "text/plain": [
525
+ " name description \\\n",
526
  "0 Digipal Digipal is a digital consultancy based in Tbil... \n",
527
  "1 BeatBind BeatBind is the industry's long overdue platfo... \n",
528
  "2 Smart Academy Smart Academy is a modern educational institut... \n",
 
535
  "94524 Knowillage Knowillage lets you add personalization to you... \n",
536
  "94525 Iris Holidays Iris Holidays is a full service Kerala tours o... \n",
537
  "\n",
538
+ " stage tags url \\\n",
539
+ "0 pre-seed [software, data] https://www.digipal.agency/ \n",
540
+ "1 pre-seed [social, leisure] https://beatbind.io/ \n",
541
+ "2 pre-seed [edtech] https://smartacademy.ge/ \n",
542
+ "3 pre-seed [software, data] https://www.maxinai.com/#all-industries \n",
543
+ "4 pre-seed [edtech] https://www.tlancer.ge/ \n",
544
+ "... ... ... ... \n",
545
+ "94521 pre-seed [software, data] www.nethr \n",
546
+ "94522 pre-seed [software, data] http://www.trialfire.com \n",
547
+ "94523 pre-seed [software, data] http://www.illumagear.com \n",
548
+ "94524 pre-seed [edtech] http://www.knowillage.com \n",
549
+ "94525 pre-seed [software, data] http://www.irisholidays.com \n",
550
  "\n",
551
+ " country text_vector_ \\\n",
552
+ "0 georgia [0.017287444323301315, 0.06208805367350578, -0... \n",
553
+ "1 georgia [-0.00438214186578989, -0.051213208585977554, ... \n",
554
+ "2 georgia [0.0005468669114634395, -0.05331585183739662, ... \n",
555
+ "3 georgia [0.021948501467704773, 0.024166792631149292, -... \n",
556
+ "4 georgia [0.02025573141872883, -0.022812215611338615, -... \n",
557
+ "... ... ... \n",
558
+ "94521 croatia [0.07235302031040192, -0.05674564838409424, -0... \n",
559
+ "94522 canada [0.030764097347855568, 0.054082825779914856, -... \n",
560
+ "94523 united-states [0.015447210520505905, -0.0984775498509407, 0.... \n",
561
+ "94524 canada [0.007970919832587242, -0.04347420111298561, -... \n",
562
+ "94525 india [0.0032976483926177025, -0.010843133553862572,... \n",
563
  "\n",
564
+ " raised size source target \n",
565
+ "0 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
566
+ "1 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
567
+ "2 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
568
+ "3 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
569
+ "4 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
570
+ "... ... ... ... ... \n",
571
+ "94521 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
572
+ "94522 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
573
+ "94523 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
574
+ "94524 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
575
+ "94525 Undisclosed 11-500+ https://www.startupblink.com Undisclosed \n",
576
  "\n",
577
+ "[94526 rows x 11 columns]"
578
  ]
579
  },
580
+ "execution_count": 91,
581
  "metadata": {},
582
  "output_type": "execute_result"
583
  }
 
593
  "}\n",
594
  "\n",
595
  "df2 = pd.read_parquet('df_encoded2.parquet')\n",
596
+ "df2 = df2[['title', 'description', 'stage', 'industry_name', 'url', 'country_slug', 'text_vector_']]\n",
597
  "df2['stage'] = df2['stage'].apply(lambda x : stage_dict[x])\n",
598
  "df2['raised'] = 'Undisclosed'\n",
599
  "df2['size'] = '11-500+'\n",
600
  "df2['source'] = 'https://www.startupblink.com'\n",
601
+ "df2['target'] = 'Undisclosed'\n",
602
+ "df2['industry_name']= df2['industry_name'].apply(lambda x : [a.strip().lower() for a in x.split('&')])\n",
603
+ "df2.columns = ['name', 'description', 'stage', 'tags', 'url', 'country', 'text_vector_', 'raised', 'size', 'source', 'target']\n",
604
+ "df2"
605
+ ]
606
+ },
607
+ {
608
+ "cell_type": "code",
609
+ "execution_count": 94,
610
+ "metadata": {},
611
+ "outputs": [],
612
+ "source": [
613
+ "df3 = pd.concat([df1, df2], ignore_index=True, axis=0).fillna('').drop(['year'], axis=1)\n",
614
+ "df3\n",
615
+ "df3.to_parquet('df_encoded3.parquet')"
616
+ ]
617
+ },
618
+ {
619
+ "cell_type": "code",
620
+ "execution_count": 88,
621
+ "metadata": {},
622
+ "outputs": [
623
+ {
624
+ "data": {
625
+ "text/html": [
626
+ "<div>\n",
627
+ "<style scoped>\n",
628
+ " .dataframe tbody tr th:only-of-type {\n",
629
+ " vertical-align: middle;\n",
630
+ " }\n",
631
+ "\n",
632
+ " .dataframe tbody tr th {\n",
633
+ " vertical-align: top;\n",
634
+ " }\n",
635
+ "\n",
636
+ " .dataframe thead th {\n",
637
+ " text-align: right;\n",
638
+ " }\n",
639
+ "</style>\n",
640
+ "<table border=\"1\" class=\"dataframe\">\n",
641
+ " <thead>\n",
642
+ " <tr style=\"text-align: right;\">\n",
643
+ " <th></th>\n",
644
+ " <th>name</th>\n",
645
+ " <th>description</th>\n",
646
+ " <th>target</th>\n",
647
+ " <th>size</th>\n",
648
+ " <th>stage</th>\n",
649
+ " <th>raised</th>\n",
650
+ " <th>tags</th>\n",
651
+ " <th>country</th>\n",
652
+ " <th>source</th>\n",
653
+ " <th>text_vector_</th>\n",
654
+ " <th>url</th>\n",
655
+ " </tr>\n",
656
+ " </thead>\n",
657
+ " <tbody>\n",
658
+ " <tr>\n",
659
+ " <th>0</th>\n",
660
+ " <td>0.10 of a Second</td>\n",
661
+ " <td>Smart Indicators for Connected Vehicles</td>\n",
662
+ " <td>B2B</td>\n",
663
+ " <td>1-10</td>\n",
664
+ " <td>Pre-Funding</td>\n",
665
+ " <td>Undisclosed</td>\n",
666
+ " <td>[connected-vehicles, adas, autonomous-vehicles...</td>\n",
667
+ " <td>Israel</td>\n",
668
+ " <td>https://finder.startupnationcentral.org/</td>\n",
669
+ " <td>[-0.031224824488162994, -0.06342269480228424, ...</td>\n",
670
+ " <td></td>\n",
671
+ " </tr>\n",
672
+ " <tr>\n",
673
+ " <th>1</th>\n",
674
+ " <td>12trix</td>\n",
675
+ " <td>Math Lessons for Young Kids</td>\n",
676
+ " <td>B2B, B2C</td>\n",
677
+ " <td>1-10</td>\n",
678
+ " <td>Pre-Funding</td>\n",
679
+ " <td>Undisclosed</td>\n",
680
+ " <td>[sdg, schools, pre-k, serious-games, games, mo...</td>\n",
681
+ " <td>Israel</td>\n",
682
+ " <td>https://finder.startupnationcentral.org/</td>\n",
683
+ " <td>[-0.038649097084999084, 0.028091922402381897, ...</td>\n",
684
+ " <td></td>\n",
685
+ " </tr>\n",
686
+ " <tr>\n",
687
+ " <th>2</th>\n",
688
+ " <td>1E Therapeutics</td>\n",
689
+ " <td>Novel RNA-targeting Drugs</td>\n",
690
+ " <td>B2B</td>\n",
691
+ " <td>51-200</td>\n",
692
+ " <td>Seed</td>\n",
693
+ " <td>$120M</td>\n",
694
+ " <td>[pharmaceuticals, chronic-disease, immunology,...</td>\n",
695
+ " <td>Israel</td>\n",
696
+ " <td>https://finder.startupnationcentral.org/</td>\n",
697
+ " <td>[0.04561534896492958, -0.017776092514395714, 0...</td>\n",
698
+ " <td></td>\n",
699
+ " </tr>\n",
700
+ " <tr>\n",
701
+ " <th>3</th>\n",
702
+ " <td>1MRobotics</td>\n",
703
+ " <td>Retail Automation Solutions with Nano Fulfillment</td>\n",
704
+ " <td>B2B</td>\n",
705
+ " <td>11-50</td>\n",
706
+ " <td>A</td>\n",
707
+ " <td>$25M</td>\n",
708
+ " <td>[omni-channel, ecommerce, climate-tech, artifi...</td>\n",
709
+ " <td>Israel</td>\n",
710
+ " <td>https://finder.startupnationcentral.org/</td>\n",
711
+ " <td>[0.0024080690927803516, -0.03042100928723812, ...</td>\n",
712
+ " <td></td>\n",
713
+ " </tr>\n",
714
+ " <tr>\n",
715
+ " <th>4</th>\n",
716
+ " <td>1touch.io</td>\n",
717
+ " <td>Personal Data Flow Tracking and Data Cataloging</td>\n",
718
+ " <td>B2B</td>\n",
719
+ " <td>51-200</td>\n",
720
+ " <td>A</td>\n",
721
+ " <td>$16.1M</td>\n",
722
+ " <td>[enterprise-solutions, data-protection, cyber-...</td>\n",
723
+ " <td>Israel</td>\n",
724
+ " <td>https://finder.startupnationcentral.org/</td>\n",
725
+ " <td>[-0.01007091999053955, 0.10431888699531555, -0...</td>\n",
726
+ " <td></td>\n",
727
+ " </tr>\n",
728
+ " <tr>\n",
729
+ " <th>5</th>\n",
730
+ " <td>24me</td>\n",
731
+ " <td>Next-generation Personal Assistant</td>\n",
732
+ " <td>B2C</td>\n",
733
+ " <td>1-10</td>\n",
734
+ " <td>Seed</td>\n",
735
+ " <td>Undisclosed</td>\n",
736
+ " <td>[time-management, scheduling, calendars, artif...</td>\n",
737
+ " <td>Israel</td>\n",
738
+ " <td>https://finder.startupnationcentral.org/</td>\n",
739
+ " <td>[0.035849399864673615, 0.04990792274475098, -0...</td>\n",
740
+ " <td></td>\n",
741
+ " </tr>\n",
742
+ " <tr>\n",
743
+ " <th>6</th>\n",
744
+ " <td>270Surgical</td>\n",
745
+ " <td>Specialty Laparoscopic System for Wide Cavity ...</td>\n",
746
+ " <td>B2B, B2C</td>\n",
747
+ " <td>11-50</td>\n",
748
+ " <td>Pre-Funding</td>\n",
749
+ " <td>Undisclosed</td>\n",
750
+ " <td>[endoscopy, surgery, operating-rooms, optics, ...</td>\n",
751
+ " <td>Israel</td>\n",
752
+ " <td>https://finder.startupnationcentral.org/</td>\n",
753
+ " <td>[-0.00110541470348835, 0.011574415490031242, 0...</td>\n",
754
+ " <td></td>\n",
755
+ " </tr>\n",
756
+ " <tr>\n",
757
+ " <th>7</th>\n",
758
+ " <td>2bPrecise</td>\n",
759
+ " <td>Precision Medicine Solution</td>\n",
760
+ " <td>B2B</td>\n",
761
+ " <td>51-200</td>\n",
762
+ " <td>Mature</td>\n",
763
+ " <td>Undisclosed</td>\n",
764
+ " <td>[decision-making, predictive-analytics, cardio...</td>\n",
765
+ " <td>Israel</td>\n",
766
+ " <td>https://finder.startupnationcentral.org/</td>\n",
767
+ " <td>[0.01863308809697628, 0.03877090662717819, -0....</td>\n",
768
+ " <td></td>\n",
769
+ " </tr>\n",
770
+ " <tr>\n",
771
+ " <th>8</th>\n",
772
+ " <td>2breathe Technologies</td>\n",
773
+ " <td>Smart Device and Mobile App to Induce Sleep</td>\n",
774
+ " <td>B2C</td>\n",
775
+ " <td>1-10</td>\n",
776
+ " <td>Pre-Funding</td>\n",
777
+ " <td>Undisclosed</td>\n",
778
+ " <td>[monitoring, digital-healthcare, sleep-disorde...</td>\n",
779
+ " <td>Israel</td>\n",
780
+ " <td>https://finder.startupnationcentral.org/</td>\n",
781
+ " <td>[-0.03323083370923996, -0.006272533442825079, ...</td>\n",
782
+ " <td></td>\n",
783
+ " </tr>\n",
784
+ " <tr>\n",
785
+ " <th>9</th>\n",
786
+ " <td>2TeaM</td>\n",
787
+ " <td>Software Solutions for Financial Companies</td>\n",
788
+ " <td>B2B, B2B2C</td>\n",
789
+ " <td>11-50</td>\n",
790
+ " <td>Mature</td>\n",
791
+ " <td>Undisclosed</td>\n",
792
+ " <td>[marketing, insurance-companies, bank-infrastr...</td>\n",
793
+ " <td>Israel</td>\n",
794
+ " <td>https://finder.startupnationcentral.org/</td>\n",
795
+ " <td>[-0.0050485446117818356, 0.030337687581777573,...</td>\n",
796
+ " <td></td>\n",
797
+ " </tr>\n",
798
+ " <tr>\n",
799
+ " <th>10</th>\n",
800
+ " <td>Digipal</td>\n",
801
+ " <td>Digipal is a digital consultancy based in Tbil...</td>\n",
802
+ " <td>georgia</td>\n",
803
+ " <td>Undisclosed</td>\n",
804
+ " <td>pre-seed</td>\n",
805
+ " <td>11-500+</td>\n",
806
+ " <td>[software, data]</td>\n",
807
+ " <td>[0.017287444323301315, 0.06208805367350578, -0...</td>\n",
808
+ " <td>https://www.startupblink.com</td>\n",
809
+ " <td>Undisclosed</td>\n",
810
+ " <td>https://www.digipal.agency/</td>\n",
811
+ " </tr>\n",
812
+ " <tr>\n",
813
+ " <th>11</th>\n",
814
+ " <td>BeatBind</td>\n",
815
+ " <td>BeatBind is the industry's long overdue platfo...</td>\n",
816
+ " <td>georgia</td>\n",
817
+ " <td>Undisclosed</td>\n",
818
+ " <td>pre-seed</td>\n",
819
+ " <td>11-500+</td>\n",
820
+ " <td>[social, leisure]</td>\n",
821
+ " <td>[-0.00438214186578989, -0.051213208585977554, ...</td>\n",
822
+ " <td>https://www.startupblink.com</td>\n",
823
+ " <td>Undisclosed</td>\n",
824
+ " <td>https://beatbind.io/</td>\n",
825
+ " </tr>\n",
826
+ " <tr>\n",
827
+ " <th>12</th>\n",
828
+ " <td>Smart Academy</td>\n",
829
+ " <td>Smart Academy is a modern educational institut...</td>\n",
830
+ " <td>georgia</td>\n",
831
+ " <td>Undisclosed</td>\n",
832
+ " <td>pre-seed</td>\n",
833
+ " <td>11-500+</td>\n",
834
+ " <td>[edtech]</td>\n",
835
+ " <td>[0.0005468669114634395, -0.05331585183739662, ...</td>\n",
836
+ " <td>https://www.startupblink.com</td>\n",
837
+ " <td>Undisclosed</td>\n",
838
+ " <td>https://smartacademy.ge/</td>\n",
839
+ " </tr>\n",
840
+ " <tr>\n",
841
+ " <th>13</th>\n",
842
+ " <td>MaxinAI</td>\n",
843
+ " <td>MaxinAI isglobal AI development company that w...</td>\n",
844
+ " <td>georgia</td>\n",
845
+ " <td>Undisclosed</td>\n",
846
+ " <td>pre-seed</td>\n",
847
+ " <td>11-500+</td>\n",
848
+ " <td>[software, data]</td>\n",
849
+ " <td>[0.021948501467704773, 0.024166792631149292, -...</td>\n",
850
+ " <td>https://www.startupblink.com</td>\n",
851
+ " <td>Undisclosed</td>\n",
852
+ " <td>https://www.maxinai.com/#all-industries</td>\n",
853
+ " </tr>\n",
854
+ " <tr>\n",
855
+ " <th>14</th>\n",
856
+ " <td>TLANCER</td>\n",
857
+ " <td>Tlancer aims to create an unlimited educationa...</td>\n",
858
+ " <td>georgia</td>\n",
859
+ " <td>Undisclosed</td>\n",
860
+ " <td>pre-seed</td>\n",
861
+ " <td>11-500+</td>\n",
862
+ " <td>[edtech]</td>\n",
863
+ " <td>[0.02025573141872883, -0.022812215611338615, -...</td>\n",
864
+ " <td>https://www.startupblink.com</td>\n",
865
+ " <td>Undisclosed</td>\n",
866
+ " <td>https://www.tlancer.ge/</td>\n",
867
+ " </tr>\n",
868
+ " <tr>\n",
869
+ " <th>15</th>\n",
870
+ " <td>MyCoins.ge</td>\n",
871
+ " <td>MyCoins.ge is the biggest Crypto exchange plat...</td>\n",
872
+ " <td>georgia</td>\n",
873
+ " <td>Undisclosed</td>\n",
874
+ " <td>pre-seed</td>\n",
875
+ " <td>11-500+</td>\n",
876
+ " <td>[fintech]</td>\n",
877
+ " <td>[0.0306679829955101, -0.010290002450346947, -0...</td>\n",
878
+ " <td>https://www.startupblink.com</td>\n",
879
+ " <td>Undisclosed</td>\n",
880
+ " <td>https://www.mycoins.ge/index.php/main/welcome</td>\n",
881
+ " </tr>\n",
882
+ " <tr>\n",
883
+ " <th>16</th>\n",
884
+ " <td>ATL Tech</td>\n",
885
+ " <td>ATL Tech is a company that specialized in Info...</td>\n",
886
+ " <td>azerbaijan</td>\n",
887
+ " <td>Undisclosed</td>\n",
888
+ " <td>pre-seed</td>\n",
889
+ " <td>11-500+</td>\n",
890
+ " <td>[software, data]</td>\n",
891
+ " <td>[0.014148630201816559, -0.01890609972178936, -...</td>\n",
892
+ " <td>https://www.startupblink.com</td>\n",
893
+ " <td>Undisclosed</td>\n",
894
+ " <td>https://www.atltech.az/az</td>\n",
895
+ " </tr>\n",
896
+ " <tr>\n",
897
+ " <th>17</th>\n",
898
+ " <td>zypl.ai</td>\n",
899
+ " <td>zypl.ai’s strategy is to become the leading AI...</td>\n",
900
+ " <td>tajikistan</td>\n",
901
+ " <td>Undisclosed</td>\n",
902
+ " <td>pre-seed</td>\n",
903
+ " <td>11-500+</td>\n",
904
+ " <td>[software, data]</td>\n",
905
+ " <td>[0.001473484211601317, 0.008834785781800747, -...</td>\n",
906
+ " <td>https://www.startupblink.com</td>\n",
907
+ " <td>Undisclosed</td>\n",
908
+ " <td>https://zypl.ai/</td>\n",
909
+ " </tr>\n",
910
+ " <tr>\n",
911
+ " <th>18</th>\n",
912
+ " <td>botifi</td>\n",
913
+ " <td>botifi is a tool for a quick start of sales on...</td>\n",
914
+ " <td>uzbekistan</td>\n",
915
+ " <td>Undisclosed</td>\n",
916
+ " <td>pre-seed</td>\n",
917
+ " <td>11-500+</td>\n",
918
+ " <td>[software, data]</td>\n",
919
+ " <td>[0.017161941155791283, -0.015285761095583439, ...</td>\n",
920
+ " <td>https://www.startupblink.com</td>\n",
921
+ " <td>Undisclosed</td>\n",
922
+ " <td>https://botifi.me/en/</td>\n",
923
+ " </tr>\n",
924
+ " <tr>\n",
925
+ " <th>19</th>\n",
926
+ " <td>smartup</td>\n",
927
+ " <td>smartup develop software solutions for various...</td>\n",
928
+ " <td>uzbekistan</td>\n",
929
+ " <td>Undisclosed</td>\n",
930
+ " <td>pre-seed</td>\n",
931
+ " <td>11-500+</td>\n",
932
+ " <td>[software, data]</td>\n",
933
+ " <td>[0.00023191649233922362, -0.005923444870859384...</td>\n",
934
+ " <td>https://www.startupblink.com</td>\n",
935
+ " <td>Undisclosed</td>\n",
936
+ " <td>https://smartup.uz/en.html</td>\n",
937
+ " </tr>\n",
938
+ " </tbody>\n",
939
+ "</table>\n",
940
+ "</div>"
941
+ ],
942
+ "text/plain": [
943
+ " name description \\\n",
944
+ "0 0.10 of a Second Smart Indicators for Connected Vehicles \n",
945
+ "1 12trix Math Lessons for Young Kids \n",
946
+ "2 1E Therapeutics Novel RNA-targeting Drugs \n",
947
+ "3 1MRobotics Retail Automation Solutions with Nano Fulfillment \n",
948
+ "4 1touch.io Personal Data Flow Tracking and Data Cataloging \n",
949
+ "5 24me Next-generation Personal Assistant \n",
950
+ "6 270Surgical Specialty Laparoscopic System for Wide Cavity ... \n",
951
+ "7 2bPrecise Precision Medicine Solution \n",
952
+ "8 2breathe Technologies Smart Device and Mobile App to Induce Sleep \n",
953
+ "9 2TeaM Software Solutions for Financial Companies \n",
954
+ "10 Digipal Digipal is a digital consultancy based in Tbil... \n",
955
+ "11 BeatBind BeatBind is the industry's long overdue platfo... \n",
956
+ "12 Smart Academy Smart Academy is a modern educational institut... \n",
957
+ "13 MaxinAI MaxinAI isglobal AI development company that w... \n",
958
+ "14 TLANCER Tlancer aims to create an unlimited educationa... \n",
959
+ "15 MyCoins.ge MyCoins.ge is the biggest Crypto exchange plat... \n",
960
+ "16 ATL Tech ATL Tech is a company that specialized in Info... \n",
961
+ "17 zypl.ai zypl.ai’s strategy is to become the leading AI... \n",
962
+ "18 botifi botifi is a tool for a quick start of sales on... \n",
963
+ "19 smartup smartup develop software solutions for various... \n",
964
+ "\n",
965
+ " target size stage raised \\\n",
966
+ "0 B2B 1-10 Pre-Funding Undisclosed \n",
967
+ "1 B2B, B2C 1-10 Pre-Funding Undisclosed \n",
968
+ "2 B2B 51-200 Seed $120M \n",
969
+ "3 B2B 11-50 A $25M \n",
970
+ "4 B2B 51-200 A $16.1M \n",
971
+ "5 B2C 1-10 Seed Undisclosed \n",
972
+ "6 B2B, B2C 11-50 Pre-Funding Undisclosed \n",
973
+ "7 B2B 51-200 Mature Undisclosed \n",
974
+ "8 B2C 1-10 Pre-Funding Undisclosed \n",
975
+ "9 B2B, B2B2C 11-50 Mature Undisclosed \n",
976
+ "10 georgia Undisclosed pre-seed 11-500+ \n",
977
+ "11 georgia Undisclosed pre-seed 11-500+ \n",
978
+ "12 georgia Undisclosed pre-seed 11-500+ \n",
979
+ "13 georgia Undisclosed pre-seed 11-500+ \n",
980
+ "14 georgia Undisclosed pre-seed 11-500+ \n",
981
+ "15 georgia Undisclosed pre-seed 11-500+ \n",
982
+ "16 azerbaijan Undisclosed pre-seed 11-500+ \n",
983
+ "17 tajikistan Undisclosed pre-seed 11-500+ \n",
984
+ "18 uzbekistan Undisclosed pre-seed 11-500+ \n",
985
+ "19 uzbekistan Undisclosed pre-seed 11-500+ \n",
986
+ "\n",
987
+ " tags \\\n",
988
+ "0 [connected-vehicles, adas, autonomous-vehicles... \n",
989
+ "1 [sdg, schools, pre-k, serious-games, games, mo... \n",
990
+ "2 [pharmaceuticals, chronic-disease, immunology,... \n",
991
+ "3 [omni-channel, ecommerce, climate-tech, artifi... \n",
992
+ "4 [enterprise-solutions, data-protection, cyber-... \n",
993
+ "5 [time-management, scheduling, calendars, artif... \n",
994
+ "6 [endoscopy, surgery, operating-rooms, optics, ... \n",
995
+ "7 [decision-making, predictive-analytics, cardio... \n",
996
+ "8 [monitoring, digital-healthcare, sleep-disorde... \n",
997
+ "9 [marketing, insurance-companies, bank-infrastr... \n",
998
+ "10 [software, data] \n",
999
+ "11 [social, leisure] \n",
1000
+ "12 [edtech] \n",
1001
+ "13 [software, data] \n",
1002
+ "14 [edtech] \n",
1003
+ "15 [fintech] \n",
1004
+ "16 [software, data] \n",
1005
+ "17 [software, data] \n",
1006
+ "18 [software, data] \n",
1007
+ "19 [software, data] \n",
1008
+ "\n",
1009
+ " country \\\n",
1010
+ "0 Israel \n",
1011
+ "1 Israel \n",
1012
+ "2 Israel \n",
1013
+ "3 Israel \n",
1014
+ "4 Israel \n",
1015
+ "5 Israel \n",
1016
+ "6 Israel \n",
1017
+ "7 Israel \n",
1018
+ "8 Israel \n",
1019
+ "9 Israel \n",
1020
+ "10 [0.017287444323301315, 0.06208805367350578, -0... \n",
1021
+ "11 [-0.00438214186578989, -0.051213208585977554, ... \n",
1022
+ "12 [0.0005468669114634395, -0.05331585183739662, ... \n",
1023
+ "13 [0.021948501467704773, 0.024166792631149292, -... \n",
1024
+ "14 [0.02025573141872883, -0.022812215611338615, -... \n",
1025
+ "15 [0.0306679829955101, -0.010290002450346947, -0... \n",
1026
+ "16 [0.014148630201816559, -0.01890609972178936, -... \n",
1027
+ "17 [0.001473484211601317, 0.008834785781800747, -... \n",
1028
+ "18 [0.017161941155791283, -0.015285761095583439, ... \n",
1029
+ "19 [0.00023191649233922362, -0.005923444870859384... \n",
1030
+ "\n",
1031
+ " source \\\n",
1032
+ "0 https://finder.startupnationcentral.org/ \n",
1033
+ "1 https://finder.startupnationcentral.org/ \n",
1034
+ "2 https://finder.startupnationcentral.org/ \n",
1035
+ "3 https://finder.startupnationcentral.org/ \n",
1036
+ "4 https://finder.startupnationcentral.org/ \n",
1037
+ "5 https://finder.startupnationcentral.org/ \n",
1038
+ "6 https://finder.startupnationcentral.org/ \n",
1039
+ "7 https://finder.startupnationcentral.org/ \n",
1040
+ "8 https://finder.startupnationcentral.org/ \n",
1041
+ "9 https://finder.startupnationcentral.org/ \n",
1042
+ "10 https://www.startupblink.com \n",
1043
+ "11 https://www.startupblink.com \n",
1044
+ "12 https://www.startupblink.com \n",
1045
+ "13 https://www.startupblink.com \n",
1046
+ "14 https://www.startupblink.com \n",
1047
+ "15 https://www.startupblink.com \n",
1048
+ "16 https://www.startupblink.com \n",
1049
+ "17 https://www.startupblink.com \n",
1050
+ "18 https://www.startupblink.com \n",
1051
+ "19 https://www.startupblink.com \n",
1052
+ "\n",
1053
+ " text_vector_ \\\n",
1054
+ "0 [-0.031224824488162994, -0.06342269480228424, ... \n",
1055
+ "1 [-0.038649097084999084, 0.028091922402381897, ... \n",
1056
+ "2 [0.04561534896492958, -0.017776092514395714, 0... \n",
1057
+ "3 [0.0024080690927803516, -0.03042100928723812, ... \n",
1058
+ "4 [-0.01007091999053955, 0.10431888699531555, -0... \n",
1059
+ "5 [0.035849399864673615, 0.04990792274475098, -0... \n",
1060
+ "6 [-0.00110541470348835, 0.011574415490031242, 0... \n",
1061
+ "7 [0.01863308809697628, 0.03877090662717819, -0.... \n",
1062
+ "8 [-0.03323083370923996, -0.006272533442825079, ... \n",
1063
+ "9 [-0.0050485446117818356, 0.030337687581777573,... \n",
1064
+ "10 Undisclosed \n",
1065
+ "11 Undisclosed \n",
1066
+ "12 Undisclosed \n",
1067
+ "13 Undisclosed \n",
1068
+ "14 Undisclosed \n",
1069
+ "15 Undisclosed \n",
1070
+ "16 Undisclosed \n",
1071
+ "17 Undisclosed \n",
1072
+ "18 Undisclosed \n",
1073
+ "19 Undisclosed \n",
1074
+ "\n",
1075
+ " url \n",
1076
+ "0 \n",
1077
+ "1 \n",
1078
+ "2 \n",
1079
+ "3 \n",
1080
+ "4 \n",
1081
+ "5 \n",
1082
+ "6 \n",
1083
+ "7 \n",
1084
+ "8 \n",
1085
+ "9 \n",
1086
+ "10 https://www.digipal.agency/ \n",
1087
+ "11 https://beatbind.io/ \n",
1088
+ "12 https://smartacademy.ge/ \n",
1089
+ "13 https://www.maxinai.com/#all-industries \n",
1090
+ "14 https://www.tlancer.ge/ \n",
1091
+ "15 https://www.mycoins.ge/index.php/main/welcome \n",
1092
+ "16 https://www.atltech.az/az \n",
1093
+ "17 https://zypl.ai/ \n",
1094
+ "18 https://botifi.me/en/ \n",
1095
+ "19 https://smartup.uz/en.html "
1096
+ ]
1097
+ },
1098
+ "execution_count": 88,
1099
+ "metadata": {},
1100
+ "output_type": "execute_result"
1101
+ }
1102
+ ],
1103
+ "source": [
1104
+ "df3"
1105
+ ]
1106
+ },
1107
+ {
1108
+ "cell_type": "code",
1109
+ "execution_count": null,
1110
+ "metadata": {},
1111
+ "outputs": [],
1112
+ "source": [
1113
+ "df = pd.merge(df1, df2, on='A', how='outer')\n",
1114
+ "df"
1115
  ]
1116
  },
1117
  {