vjain commited on
Commit
eeebb7c
1 Parent(s): bc03276

Upload app2.ipynb

Browse files
Files changed (1) hide show
  1. app2.ipynb +202 -0
app2.ipynb ADDED
@@ -0,0 +1,202 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 17,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "import gradio as gr\n",
10
+ "import openai\n",
11
+ "import pandas as pd \n",
12
+ "import numpy as np\n",
13
+ "\n",
14
+ "openai.api_key=\"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\"\n",
15
+ "from openai.embeddings_utils import get_embedding\n",
16
+ "from openai.embeddings_utils import cosine_similarity"
17
+ ]
18
+ },
19
+ {
20
+ "cell_type": "code",
21
+ "execution_count": 73,
22
+ "metadata": {},
23
+ "outputs": [],
24
+ "source": [
25
+ "def similarity(input_text):\n",
26
+ " df= pd.read_csv(\"meg_embeddings.csv\")\n",
27
+ " df['embedding'] = df['embedding'].apply(eval).apply(np.array)\n",
28
+ " input = input_text\n",
29
+ " input_vector = get_embedding(input, engine=\"text-embedding-ada-002\")\n",
30
+ " df[\"similarities\"] = df['embedding'].apply(lambda x: cosine_similarity(x, input_vector))\n",
31
+ " sorted_df =df.sort_values(\"similarities\", ascending=False)\n",
32
+ " top_row = sorted_df.loc[0]\n",
33
+ " return sorted_df.iloc[0][\"text\"]\n",
34
+ "\n"
35
+ ]
36
+ },
37
+ {
38
+ "cell_type": "code",
39
+ "execution_count": 74,
40
+ "metadata": {},
41
+ "outputs": [
42
+ {
43
+ "name": "stdout",
44
+ "output_type": "stream",
45
+ "text": [
46
+ "For example, one participant said they got ‘annoyed by everything’, yet ‘I often don’t even want to like, point out things that technicians are doing that I'm getting annoyed by because I feel like it’s not worth the trouble’\n"
47
+ ]
48
+ }
49
+ ],
50
+ "source": [
51
+ "input_text = \"\"\"For example, one participant said they got ‘annoyed by everything’, yet ‘I often don’t even want to like, point out things that technicians are doing that I'm getting annoyed by because I feel like it’s not worth the trouble’\"\"\" \n",
52
+ "print(similarity(input_text))\n"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 76,
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "name": "stderr",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "c:\\Users\\vaibh\\anaconda3\\lib\\site-packages\\gradio\\inputs.py:27: UserWarning: Usage of gradio.inputs is deprecated, and will not be supported in the future, please import your component from gradio.components\n",
65
+ " warnings.warn(\n",
66
+ "c:\\Users\\vaibh\\anaconda3\\lib\\site-packages\\gradio\\deprecation.py:40: UserWarning: `optional` parameter is deprecated, and it has no effect\n",
67
+ " warnings.warn(value)\n",
68
+ "c:\\Users\\vaibh\\anaconda3\\lib\\site-packages\\gradio\\deprecation.py:40: UserWarning: `numeric` parameter is deprecated, and it has no effect\n",
69
+ " warnings.warn(value)\n",
70
+ "c:\\Users\\vaibh\\anaconda3\\lib\\site-packages\\gradio\\outputs.py:197: UserWarning: Usage of gradio.outputs is deprecated, and will not be supported in the future, please import your components from gradio.components\n",
71
+ " warnings.warn(\n",
72
+ "c:\\Users\\vaibh\\anaconda3\\lib\\site-packages\\gradio\\deprecation.py:40: UserWarning: The 'type' parameter has been deprecated. Use the Number component instead.\n",
73
+ " warnings.warn(value)\n"
74
+ ]
75
+ },
76
+ {
77
+ "name": "stdout",
78
+ "output_type": "stream",
79
+ "text": [
80
+ "Running on local URL: http://127.0.0.1:7882\n",
81
+ "\n",
82
+ "To create a public link, set `share=True` in `launch()`.\n"
83
+ ]
84
+ },
85
+ {
86
+ "data": {
87
+ "text/html": [
88
+ "<div><iframe src=\"http://127.0.0.1:7882/\" width=\"100%\" height=\"500\" allow=\"autoplay; camera; microphone; clipboard-read; clipboard-write;\" frameborder=\"0\" allowfullscreen></iframe></div>"
89
+ ],
90
+ "text/plain": [
91
+ "<IPython.core.display.HTML object>"
92
+ ]
93
+ },
94
+ "metadata": {},
95
+ "output_type": "display_data"
96
+ },
97
+ {
98
+ "data": {
99
+ "text/plain": []
100
+ },
101
+ "execution_count": 76,
102
+ "metadata": {},
103
+ "output_type": "execute_result"
104
+ }
105
+ ],
106
+ "source": [
107
+ "input_text = gr.inputs.Textbox(label=\"Input Text\")\n",
108
+ "\n",
109
+ "\n",
110
+ "output_label = gr.outputs.Label(label=\"Similarity Text\")\n",
111
+ "\n",
112
+ "gr.Interface(fn=similarity, inputs=[input_text], outputs=output_label, title=\"Semantic Similarity Checker\", description=\"Check if input text is semantically similar to file saved locally using Ada text embeddings.\").launch()\n"
113
+ ]
114
+ },
115
+ {
116
+ "cell_type": "code",
117
+ "execution_count": 16,
118
+ "metadata": {},
119
+ "outputs": [
120
+ {
121
+ "data": {
122
+ "text/html": [
123
+ "<div>\n",
124
+ "<style scoped>\n",
125
+ " .dataframe tbody tr th:only-of-type {\n",
126
+ " vertical-align: middle;\n",
127
+ " }\n",
128
+ "\n",
129
+ " .dataframe tbody tr th {\n",
130
+ " vertical-align: top;\n",
131
+ " }\n",
132
+ "\n",
133
+ " .dataframe thead th {\n",
134
+ " text-align: right;\n",
135
+ " }\n",
136
+ "</style>\n",
137
+ "<table border=\"1\" class=\"dataframe\">\n",
138
+ " <thead>\n",
139
+ " <tr style=\"text-align: right;\">\n",
140
+ " <th></th>\n",
141
+ " <th>Unnamed: 0.1</th>\n",
142
+ " <th>Unnamed: 0</th>\n",
143
+ " <th>text</th>\n",
144
+ " <th>embedding</th>\n",
145
+ " <th>similarities</th>\n",
146
+ " </tr>\n",
147
+ " </thead>\n",
148
+ " <tbody>\n",
149
+ " <tr>\n",
150
+ " <th>70</th>\n",
151
+ " <td>70</td>\n",
152
+ " <td>70</td>\n",
153
+ " <td>One veterinarian described ‘break[ing] this cy...</td>\n",
154
+ " <td>[-0.021491553634405136, 0.005639533046633005, ...</td>\n",
155
+ " <td>0.837271</td>\n",
156
+ " </tr>\n",
157
+ " </tbody>\n",
158
+ "</table>\n",
159
+ "</div>"
160
+ ],
161
+ "text/plain": [
162
+ " Unnamed: 0.1 Unnamed: 0 \\\n",
163
+ "70 70 70 \n",
164
+ "\n",
165
+ " text \\\n",
166
+ "70 One veterinarian described ‘break[ing] this cy... \n",
167
+ "\n",
168
+ " embedding similarities \n",
169
+ "70 [-0.021491553634405136, 0.005639533046633005, ... 0.837271 "
170
+ ]
171
+ },
172
+ "execution_count": 16,
173
+ "metadata": {},
174
+ "output_type": "execute_result"
175
+ }
176
+ ],
177
+ "source": []
178
+ }
179
+ ],
180
+ "metadata": {
181
+ "kernelspec": {
182
+ "display_name": "base",
183
+ "language": "python",
184
+ "name": "python3"
185
+ },
186
+ "language_info": {
187
+ "codemirror_mode": {
188
+ "name": "ipython",
189
+ "version": 3
190
+ },
191
+ "file_extension": ".py",
192
+ "mimetype": "text/x-python",
193
+ "name": "python",
194
+ "nbconvert_exporter": "python",
195
+ "pygments_lexer": "ipython3",
196
+ "version": "3.9.13"
197
+ },
198
+ "orig_nbformat": 4
199
+ },
200
+ "nbformat": 4,
201
+ "nbformat_minor": 2
202
+ }