{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# !pip install sentence-transformers==2.0.0" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\torch\\onnx\\_internal\\_beartype.py:30: UserWarning: module 'beartype.roar' has no attribute 'BeartypeDecorHintPep585DeprecationWarning'\n", " warnings.warn(f\"{e}\")\n" ] } ], "source": [ "import pandas as pd\n", "from tqdm import tqdm\n", "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "df = pd.read_parquet('df_encoded.parquet')\n", "df" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import NearestNeighbors\n", "import numpy as np\n", "import pandas as pd\n", "\n", "from sentence_transformers import SentenceTransformer\n", "\n", "model = SentenceTransformer('all-mpnet-base-v2') #all-MiniLM-L6-v2 #all-mpnet-base-v2\n", "\n", "#prepare model\n", "# nbrs = NearestNeighbors(n_neighbors=8, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
shortNamelocationtitlehourlyRateavgFeedbackScoredescription
3761Steven F.New YorkDatabase Manger / Graphics / Social Media35.04.939631A highly skilled problem solver​ with over 10 ...
835Jacquelyn N.New YorkAdmin support specialist12.04.920992I am a Kansas City based Virtual Assistant. I ...
2787Mark H.New YorkWordPress Specialist - Development, Administra...60.04.751762Top Rated Plus | Specialize in WordPress | Inv...
3402Carleton C.New YorkExpert freelancer with skills in Divi theme, C...25.04.692159For over 30 years, I have developed a wide ran...
1156Andee F.New YorkExperienced Freelancer15.04.645855I have 8+ years of successfully providing admi...
1556Laura O.New YorkAdmin Expert with experience in Microsoft Suit...30.04.620818I have been passionate about my personal budge...
1002Nicole H.New YorkExperienced admin support and customer support...30.04.129972I'm an experienced jack of all trades. I have...
1626Drew L.New YorkFront End Web Developer50.00.000000Worked for agency with big name clients doing ...
\n", "
" ], "text/plain": [ " shortName location \\\n", "3761 Steven F. New York \n", "835 Jacquelyn N. New York \n", "2787 Mark H. New York \n", "3402 Carleton C. New York \n", "1156 Andee F. New York \n", "1556 Laura O. New York \n", "1002 Nicole H. New York \n", "1626 Drew L. New York \n", "\n", " title hourlyRate \\\n", "3761 Database Manger / Graphics / Social Media 35.0 \n", "835 Admin support specialist 12.0 \n", "2787 WordPress Specialist - Development, Administra... 60.0 \n", "3402 Expert freelancer with skills in Divi theme, C... 25.0 \n", "1156 Experienced Freelancer 15.0 \n", "1556 Admin Expert with experience in Microsoft Suit... 30.0 \n", "1002 Experienced admin support and customer support... 30.0 \n", "1626 Front End Web Developer 50.0 \n", "\n", " avgFeedbackScore description \n", "3761 4.939631 A highly skilled problem solver​ with over 10 ... \n", "835 4.920992 I am a Kansas City based Virtual Assistant. I ... \n", "2787 4.751762 Top Rated Plus | Specialize in WordPress | Inv... \n", "3402 4.692159 For over 30 years, I have developed a wide ran... \n", "1156 4.645855 I have 8+ years of successfully providing admi... \n", "1556 4.620818 I have been passionate about my personal budge... \n", "1002 4.129972 I'm an experienced jack of all trades. I have... \n", "1626 0.000000 Worked for agency with big name clients doing ... " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def filter_df(df, column_name, filter_type, filter_value):\n", " if filter_type == '==':\n", " df_filtered = df[df[column_name]==filter_value]\n", " elif filter_type == '<=':\n", " df_filtered = df[df[column_name]<=filter_value]\n", " return df_filtered\n", "\n", "def search(df, query):\n", " product = model.encode(query).tolist()\n", " # product = df.iloc[0]['text_vector_'] #use one of the products as sample\n", "\n", " nbrs = NearestNeighbors(n_neighbors=8, algorithm='ball_tree').fit(df['text_vector_'].values.tolist())\n", " distances, indices = nbrs.kneighbors([product]) #input the vector of the reference object\n", "\n", " #print out the description of every recommended product\n", " df_search = df.iloc[list(indices)[0]].drop(['skills', 'text_vector_'], axis=1).sort_values('avgFeedbackScore', ascending=False)\n", "\n", " return df_search[['shortName', 'location', 'title', 'hourlyRate', 'avgFeedbackScore', 'description']]\n", "\n", "# search('I want to hire a person who does both backend and')\n", "\n", "df_location = filter_df(df, 'location', '==', 'New York')\n", "df_price = filter_df(df_location, 'hourlyRate', '<=', 80)\n", "search(df_price, 'I want to hire a person who does both backend and')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import gradio as gr\n", "import os\n", "\n", "#the first module becomes text1, the second module file1\n", "def greet(price, location, query):\n", " # df1 = \n", " df_location = filter_df(df, 'location', '==', location)\n", " df_price = filter_df(df_location, 'hourlyRate', '<=', price)\n", " df_search = search(df_price, query)\n", " return df_search" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Slider, please remove them: {'step_size': 5}\n", " warnings.warn(\n", "c:\\Users\\ardit\\AppData\\Local\\Programs\\Python\\Python39\\lib\\site-packages\\gradio\\deprecation.py:43: UserWarning: You have unused kwarg parameters in Radio, please remove them: {'multiselect': False}\n", " warnings.warn(\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Running on local URL: http://127.0.0.1:7906\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "with gr.Blocks(theme=gr.themes.Soft(primary_hue='amber', secondary_hue='gray', neutral_hue='amber')) as demo:\n", " gr.Markdown(\n", " \"\"\"\n", " # Freelancer Upwork Search\n", " \"\"\"\n", " )\n", " input1 = gr.Slider(20, 120, value=90, step_size=5, label=\"Max Hourly Rate\")\n", " input2 = gr.Radio(['New York', 'Chicago', 'Washington'], multiselect=False, label='State', value='New York')\n", " input3 = gr.Textbox(label='Query', value='I want to develop a mobile app')\n", "\n", " btn = gr.Button(value=\"Search for Product\")\n", " output = gr.Dataframe()\n", " # btn.click(greet, inputs='text', outputs=['dataframe'])\n", " btn.click(greet, [input1, input2, input3], [output])\n", "demo.launch(share=False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# iface = gr.Interface(\n", "# fn=greet,\n", "# inputs=[\n", "# gr.Slider(20, 120, value=20, step_size=5, label=\"Max Hourly Rate\"),\n", "# gr.Radio(\n", "# ['New York', 'Chicago', 'Washington'], multiselect=False, label='State', value='New York'\n", "# ),\n", "# gr.Textbox(label='Input', value='I want to develop a mobile app')\n", "# ],\n", "# outputs=[\"dataframe\"])\n", "# iface.launch(share=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3.9.0 64-bit", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.13" }, "orig_nbformat": 4, "vscode": { "interpreter": { "hash": "fdf377d643bc1cb065454f0ad2ceac75d834452ecf289e7ba92c6b3f59a7cee1" } } }, "nbformat": 4, "nbformat_minor": 2 }