{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "hovertemplate": "x=%{x}
y=%{y}", "legendgroup": "", "marker": { "color": "#636efa", "symbol": "circle" }, "mode": "markers", "name": "", "orientation": "v", "showlegend": false, "type": "scatter", "x": [ 0.11065170952653092, 0.9313424805150406, -1.5996490940811126, -0.8519844340509644, -1.7340464584342041, -0.3775897479971143, 0.9342174794849762, -0.3661428747413339, -1.2932493735425519, -0.49093114163634255, -0.3551007466229788, -1.5340231679496692, 0.4078294367610518, -2.1442638646046617, 0.6231023249264844, -0.6998619957260692, -0.1113983771908388, 0.7285629389163905, 0.46044422901665105, 0.6814905041672645, 0.4219703127102215, -0.7752652184393516, 0.7955340314268295, -0.7314030456466243, 0.9872641829556011, -0.6583629187263107, 0.6047801828460544, 0.20295411232272387, -0.3072853007045659, -0.04825357656986248, 0.22843186056424186, 0.3715182306968289, -0.8177510455428024, 0.42432092526509263, 1.3846356216909304, 0.12614132328028024, 2.621758555667932, -1.0520216457461542, -1.9875946687784296, 1.2126073567588975, -0.005340800043309528, 0.6425833097783461, -0.23085575191583718, -0.39870692645269185, 0.22001362060737853, 0.6935952715973245, -0.23899657873918492, -0.2647563455109995, -0.1552225159983857, 0.8367589770008363, 0.6144945885021259, -1.5671191790556918, -0.18360560911852558, 0.6053804394441732, 0.6702519070882618, -1.5371262060244244, -1.8799101652646657, 0.8274294657447646, -0.9778838736457496, -1.32563441413571, -0.47131523078112636, -0.8237529087079579, 0.0749440530096387, -1.1982510124495016, -0.48032273082852944, 1.6803302848658004, -0.1877506957031164, -0.21457440257322793, -1.6357931534618344, 0.06631255614130224, 1.033698385736438, 0.6557679210979364, 0.906246158318645, 0.6040398563966467, 0.3778948128345762, -0.4923550753337614, 0.39072985960174966, 0.9855721759276989, -0.7409752742572114, -1.2737238002887008, 0.22940454505219351, 0.7819262496258474, -0.4288546165934158, 0.8572089084430538, -0.2903102705549142, -0.7319677340190642, 1.2303065042277, -1.9725578060317988, 0.32598692464265033, -0.5876222492318771, 0.8007952391605019, -1.4990544921428144, -0.8627915577138465, -0.6765202184953598, 1.1954685998825016, 0.5950046597901215, 1.975336422109746, -1.5833977844592078, 0.14195434583624839, -0.35137002654826655 ], "xaxis": "x", "y": [ -0.4629101105537382, -0.2328003707850657, 0.08828254314043757, 0.8482893235591087, 0.0771552141212955, 0.45420000337695093, 0.6107289239044659, 0.12055603761846795, -0.7550448080765189, -1.838932065060547, 0.4806534709489732, 1.151346161003326, 0.2478735044883051, 0.06641255531994601, -0.5489097568589677, -0.4973455322789025, 0.4724629696550256, 1.3080386645719073, -1.0194257866836292, 1.3661003420325573, 1.4517050942871168, 2.2756191195760964, 0.6866903934626691, 0.05090384642811877, -1.042704891086457, -0.3320134837963288, -1.0765856360639627, 0.6605128494068281, -1.128281682848767, -0.7553027387022911, -0.29547549213277924, -0.33700831200450676, -0.10941772118430755, 1.68310769353848, -0.19229315857114024, 0.8294415908956625, -1.3417039289417374, -0.7048263733247031, 1.168681096473467, -0.8932875274711288, 1.5922472197868605, -1.066334442512193, 0.5498063882137004, 1.7803337014200828, 0.4327588386400595, -1.82700153607581, -0.2847320646428013, -1.2718678778713373, 0.030647100939450736, -0.3726484952087042, 1.24677793821802, -0.9627886538790479, 0.053176481171674006, 0.21228159481404182, 2.2210778206099895, -0.3210863506096476, -0.13099360273041932, -1.3584896597178582, 2.6607319250019947, -0.41318978236361714, -1.0836282188237547, -0.4870997733370743, -0.9613820399144345, 0.8207835881444858, 1.1732660854078711, -0.0695355567682757, -1.26447561478453, -0.5190975876980604, 0.4607032579572695, -0.09378808026636432, -0.12440072801979105, 1.8745088793182847, 0.454356329723802, 1.359892435465479, -0.01437712091152146, -0.8529776587086099, -0.4959037185693348, -0.3351558645302966, 0.041836528406660786, 1.4252605845255253, -0.07426595321119726, 0.9202368920501296, -0.5671337897282676, -0.600754544014511, 1.3359736900383594, 0.16076219666418068, 0.6872459440118796, 0.7243820452672731, 0.24446435572716743, -1.1772300880788433, 1.892655561835316, 1.185630814982556, -0.8623991303777663, -1.5808521340820116, 0.23459772035678836, 0.4098229267414775, -0.4550057901832996, -0.2261330459822914, 0.23013128978783395, 0.7516276562909543 ], "yaxis": "y" }, { "marker": { "color": "red", "size": 10 }, "mode": "markers", "type": "scatter", "x": [ -0.1552225159983857, 1.975336422109746, -1.2737238002887008, -0.8627915577138465, -1.6357931534618344, -0.6583629187263107, 0.8007952391605019, -0.4923550753337614, 0.14195434583624839, 0.6231023249264844 ], "y": [ 0.030647100939450736, -0.4550057901832996, 1.4252605845255253, -0.8623991303777663, 0.4607032579572695, -0.3320134837963288, 1.892655561835316, -0.8529776587086099, 0.23013128978783395, -0.5489097568589677 ] } ], "layout": { "legend": { "tracegroupgap": 0 }, "margin": { "t": 60 }, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "fillpattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "xaxis": { "anchor": "y", "domain": [ 0, 1 ], "title": { "text": "x" } }, "yaxis": { "anchor": "x", "domain": [ 0, 1 ], "title": { "text": "y" } } } } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import numpy as np\n", "import plotly.express as px\n", "\n", "def farthest_first_traversal(points, k, return_type='indices'):\n", " if return_type == 'values': \n", " # randomly select first point from the dataset\n", " centers = [points[np.random.randint(0, len(points))]]\n", " \n", " while len(centers) < k:\n", " # compute distances from all points to the selected centers\n", " distances = np.array([min([np.linalg.norm(p-c) for c in centers]) for p in points])\n", " \n", " # select the point farthest away from the centers\n", " farthest = points[np.argmax(distances)]\n", " \n", " # add the farthest point to the selected centers\n", " centers.append(farthest)\n", " \n", " return np.array(centers) #given a list, directly returns values\n", "\n", " elif return_type == 'indices':\n", " # randomly select first point from the dataset\n", " centers = [np.random.randint(0, len(points))]\n", " indices = [centers[0]]\n", " \n", " while len(centers) < k:\n", " # compute distances from all points to the selected centers\n", " distances = np.array([min([np.linalg.norm(points[p]-points[c]) for c in centers]) for p in range(len(points))])\n", " \n", " # select the point farthest away from the centers\n", " farthest = np.argmax(distances)\n", " \n", " # add the farthest point to the selected centers\n", " centers.append(farthest)\n", " indices.append(farthest)\n", " \n", " return np.array(indices) #given a list, returns the indices of the list\n", "\n", "# generate sample data\n", "# np.random.seed(42)\n", "points = np.random.randn(100, 20) #last param is the n. dimensions\n", "\n", "centers = farthest_first_traversal(points, k=10, return_type='values')\n", "# visualize results using Plotly\n", "fig = px.scatter(x=points[:,0], y=points[:,1])\n", "fig.add_scatter(x=centers[:,0], y=centers[:,1], mode='markers', marker=dict(size=10, color='red'))\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\onnx\\_internal\\_beartype.py:30: UserWarning: module 'beartype.roar' has no attribute 'BeartypeDecorHintPep585DeprecationWarning'\n", " warnings.warn(f\"{e}\")\n" ] } ], "source": [ "import os\n", "# os.system('pip install openpyxl')\n", "# os.system('pip install sentence-transformers')\n", "import pandas as pd\n", "import gradio as gr\n", "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n", "\n", "df = pd.read_parquet('df_encoded3.parquet')\n", "df['tags'] = df['tags'].apply(lambda x : str(x))\n", "def parse_raised(x):\n", " if x == 'Undisclosed':\n", " return 0\n", " else: \n", " quantifier = x[-1]\n", " x = float(x[1:-1])\n", " if quantifier == 'K':\n", " return x/1000\n", " elif quantifier == 'M':\n", " return x\n", "df['raised'] = df['raised'].apply(lambda x : parse_raised(x))\n", "df['stage'] = df['stage'].apply(lambda x : x.lower())\n", "df = df.reset_index(drop=True)\n", "\n", "from sklearn.neighbors import NearestNeighbors\n", "import pandas as pd\n", "from sentence_transformers import SentenceTransformer\n", "\n", "nbrs = NearestNeighbors(n_neighbors=5000, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n", "\n", "def search(df, query):\n", " product = model.encode(query).tolist()\n", " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n", "\n", " #prepare model\n", " # \n", " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n", "\n", " #print out the description of every recommended product\n", " return df.iloc[list(indices)[0]][['name', 'raised', 'target', 'size', 'stage', 'country', 'source', 'description', 'tags', 'text_vector_']]\n", "\n", "def filter_df(df, column_name, filter_type, filter_value, minimum_acceptable_size=0):\n", " if filter_type == '==':\n", " df_filtered = df[df[column_name]==filter_value]\n", " elif filter_type == '>=':\n", " df_filtered = df[df[column_name]>=filter_value]\n", " elif filter_type == '<=':\n", " df_filtered = df[df[column_name]<=filter_value]\n", " elif filter_type == 'contains':\n", " df_filtered = df[df['target'].str.contains(filter_value)]\n", "\n", " if df_filtered.size >= minimum_acceptable_size:\n", " return df_filtered\n", " else:\n", " return df\n", "\n", "import pandas as pd\n", "import numpy as np\n", "from sentence_transformers import SentenceTransformer\n", "from sklearn.metrics.pairwise import cosine_similarity\n", "\n", "def score_filter(df, query, min_score):\n", " # Define function to compute cosine similarity between two vectors\n", " def cosine_sim(query, vector):\n", " return cosine_similarity([query], [vector])[0][0]\n", "\n", " # df_results = search(df, 'age reversing')[0:50]\n", " vector_col = np.array(df['text_vector_'].tolist())\n", "\n", " # Define query vector\n", " query = model.encode([query])[0]\n", "\n", " # Compute cosine similarity between query vector and every sample vector\n", " df['similarity'] = np.apply_along_axis(cosine_sim, 1, vector_col, query)\n", " df = df[df['similarity']>=min_score]\n", " return df" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([['Klogene Therapeutics, Inc.',\n", " 'Prevention and treatment of age related diseases'],\n", " ['Inverse',\n", " 'Inverse is changing the way women all around the world condition and care for their hair.'],\n", " ['AGELON М',\n", " 'Agelon M is the service of online investigations based on automated targeting, verification and surveying respondents in social networks.'],\n", " ['Age of Learning',\n", " 'Age of Learning blends education best practices, innovative technology, and insightful creativity to bring learning to life for children across the U.S. and around the world.'],\n", " ['Aprilage Inc',\n", " 'Seeing IS believing! Aprilage develops visualization software that shows people their \"future self\" and how their lifestyle of today will affect how they will look as they age. Our software, APRIL®, is currently used by 500 health providers, educators and insurers in more than 25 countries as a tool for health education about chronic disease prevention and behavior modification. APRIL® helps them demonstrate to their patients and clients the consequences of certain health and lifestyle choices (e.g. obesity, smoking, heavy sun exposure). Our clients tell us it works because it uses the best health motivator ever created – their own appearance. Two new APRIL® products are in final development for market launch - one for those engaged in the prevention and education about obesity, and the other for those in the aesthetics markets (plastic surgeons, dermatologists, and cosmetic companies).']],\n", " dtype=object)" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_results = search(df, 'age-reversing')[0:20]\n", "points = df_results['text_vector_'].values\n", "indices = farthest_first_traversal(points, k=5, return_type='indices')\n", "df_results.iloc[indices][['name', 'description']].values" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [], "source": [ "import requests\n", "\n", "def gpt3_question(api_key, prompt):\n", " api_endpoint = \"https://api.openai.com/v1/engines/text-davinci-003/completions\"\n", " headers = {\n", " \"Content-Type\": \"application/json\",\n", " \"Authorization\": f\"Bearer {api_key}\"\n", " }\n", " data = {\n", " \"prompt\": prompt,\n", " \"max_tokens\": 500,\n", " \"temperature\": 0.7\n", " }\n", " print('sending request')\n", " response = requests.post(api_endpoint, headers=headers, json=data)\n", " print(response.text)\n", " generated_text = response.json()[\"choices\"][0][\"text\"]\n", "\n", " return generated_text\n", "\n", "def competitor_analysis_foo(startup_array, max_paragraphs):\n", " prompt = f\"\"\"\n", " {str(startup_array)}\n", " This is a list of startups in the following format: [name, stage, description]:\n", "\n", " Write a {max_paragraphs} paragraph competitors analysis based on this data. Do not name the paragraphs.\n", " \"\"\"\n", " #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\n", " response = gpt3_question('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', prompt)\n", "\n", " for x in range(10):\n", " response = response.replace(f'Paragraph {x}:', '')\n", " response = response.replace(f'Paragraph {x}', '')\n", " response = response.replace('\\n\\n', '\\n').strip()\n", "\n", " # with open('competitor_analysis.txt', 'w') as file:\n", " # file.write(response)\n", " return response" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7901\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" }, { "name": "stdout", "output_type": "stream", "text": [ "sending request\n", "{\"id\":\"cmpl-734OdouEI70awj0YgzW2v9fLQCusE\",\"object\":\"text_completion\",\"created\":1680966239,\"model\":\"text-davinci-003\",\"choices\":[{\"text\":\"\\n Paragraph 1: \\n There appears to be a large focus on the pre-seed stage in this list of startups. AgeRate, Klogene Therapeutics, Inc., Modern Age, Age Labs, Assured Allies, Spring Discovery, AgeNation, and Elevian are all pre-seed startups that provide a variety of services related to aging. AgeRate and Age Labs are focused on providing accurate and affordable epigenetic tests to reveal a person’s biological age. Klogene Therapeutics, Inc. is focused on prevention and treatment of age-related diseases. Modern Age is a health and wellness platform designed to make the journey of aging more manageable. Assured Allies is a company focused on successful aging. Spring Discovery is focused on drug discovery for age-related diseases, and AgeNation is a digital media company for baby boomers and seniors. Lastly, Elevian is a molecular diagnostics company that seeks to reduce the cost of developing drugs by accurately measuring biological age.\\n \\n Paragraph 2:\\n In addition, there is one startup in the C stage, Youth Laboratories, which is focused on machine vision and artificial intelligence for beauty and healthy longevity. This company specializes in processing digital images and videos to develop diseases biomarkers and to evaluate the physical well-being of a human and lifestyle. By harnessing the power of artificial intelligence, they seek to slow down or even reverse age-associated changes, particularly those related to skin. \\n \\n Paragraph 3:\\n Taken together, these startups provide a comprehensive range of services related to aging, from molecular diagnostics and drug discovery to digital media and artificial intelligence. They offer both prevention and treatment of age-related diseases, as well as ways to measure and manage the aging process. With the potential for these companies to make a positive impact on the aging process, it is likely that competition between them will increase in the future.\",\"index\":0,\"logprobs\":null,\"finish_reason\":\"stop\"}],\"usage\":{\"prompt_tokens\":482,\"completion_tokens\":392,\"total_tokens\":874}}\n", "\n" ] } ], "source": [ "#the first module becomes text1, the second module file1\n", "def vector_search(size, target, stage, query, var_metadata, var_fresh): #greet('11-500+', 'B2B', 'pre-seed', 'age-reversing')\n", " def raised_zero(x):\n", " if x == 0:\n", " return 'Undisclosed'\n", " else:\n", " return x\n", " df_knn = search(df, query)\n", " df_knn['raised'] = df_knn['raised'].apply(lambda x : raised_zero(x))\n", "\n", " df_size = filter_df(df_knn, 'size', '==', size, 1)\n", "\n", " if stage != 'ALL':\n", " df_stage = filter_df(df_size, 'stage', '==', stage.lower(), 1)\n", " else:\n", " #we bypass the filter\n", " df_stage = df_size\n", "\n", " df_target = filter_df(df_stage, 'target', 'contains', target, 1)\n", " \n", " # display(df_stage)\n", " # df_raised = df_target[(df_target['raised'] >= raised) | (df_target['raised'] == 0)]\n", "\n", " return df_target.drop('text_vector_', axis=1)[0:100], df_target[0:100], True #.sort_values('raised', ascending=False)\n", "\n", "def write_competitor_analysis(var_metadata, query, var_fresh):\n", "\n", " if var_fresh == True:\n", " df_final = score_filter(var_metadata, query, 0.35)\n", " df_final = df_final[['name', 'stage', 'description']][0:10].values.tolist()\n", "\n", " if len(df_final) == 0:\n", " # df_final = df_final[['name', 'stage', 'description']][0:3].values.tolist()\n", " # response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=1)\n", " response = 'score too low to output valid results'\n", " if len(df_final) >= 1 and len(df_final) <= 3:\n", " response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=1)\n", " elif len(df_final) > 3 and len(df_final) <= 5:\n", " response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=2)\n", " elif len(df_final) > 6:\n", " response = competitor_analysis_foo(startup_array=df_final, max_paragraphs=3)\n", "\n", " return response, False #we reset fresh state\n", "\n", " else:\n", " return 'Perform a new Startup Search first', False #we reset fresh state\n", "\n", "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n", " gr.Markdown(\n", " \"\"\"\n", " # Startup Search Engine\n", " \"\"\"\n", " )\n", " var_fresh = gr.Variable(value=False)\n", " var_metadata = gr.Variable(value=0)\n", " var_query = gr.Variable(value=0)\n", " size = gr.Radio(['1-10', '11-50', '51-200', '201-500', '500+', '11-500+'], multiselect=False, value='11-500+', label='size')\n", " target = gr.Radio(['B2B', 'B2C', 'B2G', 'B2B2C'], multiselect=False, value='B2B', label='target')\n", " stage = gr.Radio(['pre-seed', 'A', 'B', 'C', 'ALL'], multiselect=False, value='ALL', label='stage')\n", " # raised = gr.Slider(0, 20, value=5, step_size=1, label=\"Minimum raising (in Millions)\")\n", " query = gr.Textbox(label='Describe the Startup you are searching for', value='age reversing')\n", " # competitor_analysis = gr.Radio(['write', 'do not write'], multiselect=False, value='do not write', label='write a competitor analysis')\n", "\n", " btn2 = gr.Button(value=\"Search for a Startup\")\n", " btn1 = gr.Button(value=\"Write a competitor analysis\")\n", " \n", " output1 = gr.Textbox(label='competitor analysis')\n", " output2 = gr.DataFrame(label='value')\n", "\n", " btn1.click(write_competitor_analysis, [var_metadata, query, var_fresh], [output1, var_fresh]) #competitor analysis\n", " btn2.click(vector_search, [size, target, stage, query, var_metadata, var_fresh], [output2, var_metadata, var_fresh]) #startup search\n", "\n", "demo.launch(share=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "fdf377d643bc1cb065454f0ad2ceac75d834452ecf289e7ba92c6b3f59a7cee1" } } }, "nbformat": 4, "nbformat_minor": 2 }